In [1]:
import numpy
print("NumPy version:", numpy.__version__)

NumPy version: 2.0.1


In [2]:
import sys
print(sys.executable)

/home/hice1/cxu371/scratch/miniconda3/envs/mlg_fix/bin/python3


In [3]:
!which python

/usr/local/pace-apps/manual/packages/anaconda3/2023.03/bin/python


In [4]:
!which pip

/usr/local/pace-apps/manual/packages/anaconda3/2023.03/bin/pip


In [5]:
from torch_geometric.llm import RAGQueryLoader

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
from torch_geometric.llm.models import (
    LLM,
    TXT2KG,
    GRetriever,
    LLMJudge,
    SentenceTransformer,
)
from torch_geometric.llm.models.txt2kg import _chunk_text
from torch_geometric.llm.utils.backend_utils import (
    create_graph_from_triples,
    create_remote_backend_from_graph_data,
    make_pcst_filter,
    preprocess_triplet,
)
from torch_geometric.llm.utils.feature_store import KNNRAGFeatureStore
from torch_geometric.llm.utils.graph_store import NeighborSamplingRAGGraphStore
from torch_geometric.llm.utils.vectorrag import DocumentRetriever
from torch_geometric.loader import DataLoader
from torch_geometric.nn import GAT, SGFormer

In [7]:
import torch
from torch_geometric.data import Data

# ---- Load the graph ----
# graph_path = "./final_graph.pt"
# print(f"🔍 Loading graph from: {graph_path}")
# graph = torch.load(graph_path, weights_only=False)  # keeps full Data object
# print("✅ Graph loaded successfully!\n")

In [7]:
print("=== GRAPH SUMMARY ===")
print(graph)
print(f"Num nodes: {graph.num_nodes}")
print(f"Num edges: {graph.num_edges}")
print(f"Node feature dim: {graph.x.size(1) if hasattr(graph, 'x') else 'N/A'}")
print(f"Label dim: {graph.y.size(1) if hasattr(graph, 'y') else 'N/A'}")

# ---- Split masks ----
for mask_name in ["train_mask", "val_mask", "test_mask"]:
    if hasattr(graph, mask_name):
        mask = getattr(graph, mask_name)
        print(f"{mask_name}: {mask.sum().item()} nodes ({mask.sum().item()/graph.num_nodes:.2%})")

# ---- Edge inspection ----
print("\n=== Example Edges (src → dst) ===")
edge_index = graph.edge_index
if isinstance(edge_index, tuple) and len(edge_index) == 2:
    src, dst = edge_index
else:
    src, dst = edge_index[0], edge_index[1]
for i in range(min(10, src.size(0))):
    print(f"{src[i].item()} → {dst[i].item()}")

# ---- Edge attributes ----
if hasattr(graph, "edge_attr") and graph.edge_attr is not None:
    print("\n=== Edge Attributes (first 3) ===")
    print(graph.edge_attr[:3])
    print(f"Edge attr shape: {graph.edge_attr.shape}")

# ---- Node features ----
if hasattr(graph, "x"):
    print("\n=== Node Features (first 3) ===")
    print(graph.x[:3])
    print(f"Feature tensor shape: {graph.x.shape}")

# ---- Node labels ----
if hasattr(graph, "y"):
    print("\n=== Node Labels (first 3, nonzero label indices) ===")
    for i in range(3):
        label_indices = torch.nonzero(graph.y[i]).flatten().tolist()
        print(f"Node {i} labels → {label_indices}")

# ---- Node IDs (if exist) ----
if hasattr(graph, "node_id"):
    print("\n=== Node IDs (first 10) ===")
    print(graph.node_id[:10])

# ---- Basic graph stats ----
print("\n=== Structural Stats ===")
degrees = torch.bincount(graph.edge_index[0], minlength=graph.num_nodes)
print(f"Average node degree: {degrees.float().mean():.2f}")
print(f"Max node degree: {degrees.max().item()}")
print(f"Graph density: {graph.num_edges / (graph.num_nodes ** 2):.8f}")

# ---- Memory footprint ----
if hasattr(graph, "x"):
    feature_mem_mb = graph.x.numel() * graph.x.element_size() / 1024**2
    print(f"Approx. feature matrix size: {feature_mem_mb:.2f} MB")


=== GRAPH SUMMARY ===
Graph(x=[107694, 822], y=[107694, 54], train_mask=[107694], val_mask=[107694], test_mask=[107694], edge_index=[2, 124056], edge_attr=[124056])
Num nodes: 107694
Num edges: 124056
Node feature dim: 822
Label dim: 54
train_mask: 97539 nodes (90.57%)
val_mask: 7953 nodes (7.38%)
test_mask: 659 nodes (0.61%)

=== Example Edges (src → dst) ===
0 → 34
0 → 57
0 → 222
1 → 258
1 → 47
1 → 324
1 → 723
1 → 724
1 → 2151
1 → 2182

=== Edge Attributes (first 3) ===
tensor([0, 3, 3])
Edge attr shape: torch.Size([124056])

=== Node Features (first 3) ===
tensor([[ 0.0458,  0.0293, -0.0242,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0130, -0.0175,  0.0066,  ...,  0.0000,  0.9181,  0.0000],
        [-0.0117, -0.0524,  0.0065,  ...,  0.0000,  0.8182,  0.0000]])
Feature tensor shape: torch.Size([107694, 822])

=== Node Labels (first 3, nonzero label indices) ===
Node 0 labels → [19]
Node 1 labels → [0]
Node 2 labels → [0]

=== Structural Stats ===
Average node degree: 1.15
Max node

In [8]:
import pandas as pd
import psutil

pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.width', 0)

# --- Check available memory ---
mem_gb = psutil.virtual_memory().available / 1024**3
print(f"🧠 Available memory before load: {mem_gb:.1f} GB")

# --- Load the pickle ---
path = "./nodes_df.pkl"
print(f"📂 Loading {path} ... this may take a few minutes if large.")
nodes_df = pd.read_pickle(path)

print("\n✅ Loaded successfully!")
print(f"Shape: {nodes_df.shape}")
print(f"Memory usage: {nodes_df.memory_usage(deep=True).sum() / 1024**3:.2f} GB")

# --- Inspect structure ---
print("\n=== Columns ===")
print(nodes_df.columns.tolist())

print("\n=== Info ===")
print(nodes_df.info(memory_usage='deep'))

# # --- Show first few rows ---
# print("\n=== Sample Rows (first 5) ===")
# display(nodes_df.head(5))

🧠 Available memory before load: 170.2 GB
📂 Loading ./nodes_df.pkl ... this may take a few minutes if large.

✅ Loaded successfully!
Shape: (389575, 7)
Memory usage: 3.08 GB

=== Columns ===
['id', 'description', 'createdAt', 'type', 'y_multi_lab', 'relationships', 'y']

=== Info ===
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 389575 entries, 0 to 389574
Data columns (total 7 columns):
 #   Column         Non-Null Count   Dtype 
---  ------         --------------   ----- 
 0   id             389575 non-null  object
 1   description    389575 non-null  object
 2   createdAt      389575 non-null  object
 3   type           389575 non-null  object
 4   y_multi_lab    389575 non-null  object
 5   relationships  389575 non-null  object
 6   y              389575 non-null  object
dtypes: object(7)
memory usage: 3.1 GB
None


In [9]:
# pd.set_option('display.max_columns', 200)
# pd.set_option('display.max_colwidth', 200)
# pd.set_option('display.width', 200)

display(nodes_df.head(5))

Unnamed: 0,id,description,createdAt,type,y_multi_lab,relationships,y
0,tencent/SRPO,"---\nlibrary_name: diffusers\nlicense: other\nlicense_name: tencent-hunyuan-community\nlicense_link: https://github.com/Tencent-Hunyuan/SRPO/blob/main/LICENSE.txt\npipeline_tag: text-to-image\n---\n\n<div align=“center” style=“font-family: charter;”>\n<h1 align=""center"">Directly Aligning the Full Diffusion Trajectory with Fine-Grained Human Preference </h1>\n<div align=""center"">\n <a href='https://arxiv.org/abs/2509.06942'><img src='https://img.shields.io/badge/ArXiv-red?logo=arxiv'></a> &nbsp;\n <a href='https://github.com/Tencent-Hunyuan/SRPO'><img src='https://img.shields.io/badge/_Code-SRPO-181717?color=121717&logo=github&logoColor=whitee'></a> &nbsp; \n <a href='https://tencent.github.io/srpo-project-page/'><img src='https://img.shields.io/badge/%F0%9F%92%BB_Project-SRPO-blue'></a> &nbsp;\n</div>\n<div align=""center"">\n Xiangwei Shen<sup>1,2*</sup>,\n <a href=""https://scholar.google.com/citations?user=Lnr1FQEAAAAJ&hl=zh-CN"" target=""_blank""><b>Zhimin Li</b></a><sup>1*</sup>,\n <a href=""https://scholar.google.com.hk/citations?user=Fz3X5FwAAAAJ"" target=""_blank""><b>Zhantao Yang</b></a><sup>1</sup>, \n <a href=""https://shiyi-zh0408.github.io/"" target=""_blank""><b>Shiyi Zhang</b></a><sup>3</sup>,\n Yingfang Zhang<sup>1</sup>,\n Donghao Li<sup>1</sup>,\n <br>\n <a href=""https://scholar.google.com/citations?user=VXQV5xwAAAAJ&hl=en"" target=""_blank""><b>Chunyu Wang</b></a><sup>1</sup>,\n <a href=""https://openreview.net/profile?id=%7EQinglin_Lu2"" target=""_blank""><b>Qinglin Lu</b></a><sup>1</sup>,\n <a href=""https://andytang15.github.io"" target=""_blank""><b>Yansong Tang</b></a><sup>3,✝</sup>\n</div>\n<div align=""center"">\n <sup>1</sup>Hunyuan, Tencent \n <br>\n <sup>2</sup>School of Science and Engineering, The Chinese University of Hong Kong, Shenzhen \n <br>\n <sup>3</sup>Shenzhen International Graduate School, Tsinghua University \n <br>\n <sup>*</sup>Equal contribution \n <sup>✝</sup>Corresponding author\n</div>\n\n\n\n## Abstract\nRecent studies have demonstrated the effectiveness of directly aligning diffusion models with human preferences using differentiable reward. However, they exhibit two primary challenges: (1) they rely on multistep denoising with gradient computation for reward scoring, which is computationally expensive, thus restricting optimization to only a few diffusion steps; (2) they often need continuous offline adaptation of reward models in order to achieve desired aesthetic quality, such as photorealism or precise lighting effects. To address the limitation of multistep denoising, we propose Direct-Align, a method that predefines a noise prior to effectively recover original images from any time steps via interpolation, leveraging the equation that diffusion states are interpolations between noise and target images, which effectively avoids over-optimization in late timesteps. Furthermore, we introduce Semantic Relative Preference Optimization (SRPO), in which rewards are formulated as text-conditioned signals. This approach enables online adjustment of rewards in response to positive and negative prompt augmentation, thereby reducing the reliance on offline reward fine-tuning. By fine-tuning the FLUX.1.dev model with optimized denoising and online reward adjustment, we improve its human-evaluated realism and aesthetic quality by over 3x.\n\n## Acknowledgement\n\nWe sincerely appreciate contributions from the research community to this project. Below are quantized versions developed by fellow researchers.\n\n1. 8bit(fp8_e4m3fn/Q8_0) version by wikeeyang: https://huggingface.co/wikeeyang/SRPO-Refine-Quantized-v1.0\n![image/png](https://cdn-uploads.huggingface.co/production/uploads/6645835a2b57c619a19cc0c4/BATJ0bW_0QPhkN5WY0Q1H.png)\n\n2. bf16 version by rockerBOO: https://huggingface.co/rockerBOO/flux.1-dev-SRPO\n3. GGUF version by befox: https://huggingface.co/befox/SRPO-GGUF\n\n⚠️ Note: When loading weights in ComfyUI, avoid direct conversion of FP32 weights to FP8 format, as this may result in incomplete denoising. For official weights in this repository, FP32/BF16 loading is recommended.\n\n\n### Checkpoints\nThe `diffusion_pytorch_model.safetensors` is online version of SRPO based on [FLUX.1 Dev](https://huggingface.co/black-forest-labs/FLUX.1-dev), trained on HPD dataset with [HPSv2](https://github.com/tgxs002/HPSv2)\n## 🔑 Inference\n\n### Using ComfyUI\n\nYou can use it in [ComfyUI](https://github.com/comfyanonymous/ComfyUI).\n\nLoad the following image in ComfyUI to get the workflow, or load the JSON file directly [SRPO-workflow](comfyui/SRPO-workflow.json):\n\nTip: The workflow JSON info was added to the image file.\n\n![Example](comfyui/SRPO-workflow.png)\n\n### Quick start\n```bash\nfrom diffusers import FluxPipeline\nfrom safetensors.torch import load_file\n\nprompt='The Death of Ophelia by John Everett Millais, Pre-Raphaelite painting, Ophelia floating in a river surrounded by flowers, detailed natural elements, melancholic and tragic atmosphere'\npipe = FluxPipeline.from_pretrained('./data/flux',\n torch_dtype=torch.bfloat16,\n use_safetensors=True\n ).to(""cuda"")\nstate_dict = load_file(""./srpo/diffusion_pytorch_model.safetensors"")\npipe.transformer.load_state_dict(state_dict)\nimage = pipe(\n prompt,\n guidance_scale=3.5,\n height=1024,\n width=1024,\n num_inference_steps=50,\n max_sequence_length=512,\n generator=generator\n).images[0]\n```\n### License\nSRPO is licensed under the License Terms of SRPO. See `./License.txt` for more details.\n## Citation\nIf you use SRPO for your research, please cite our paper:\n\n```bibtex\n@misc{shen2025directlyaligningdiffusiontrajectory,\n title={Directly Aligning the Full Diffusion Trajectory with Fine-Grained Human Preference}, \n author={Xiangwei Shen and Zhimin Li and Zhantao Yang and Shiyi Zhang and Yingfang Zhang and Donghao Li and Chunyu Wang and Qinglin Lu and Yansong Tang},\n year={2025},\n eprint={2509.06942},\n archivePrefix={arXiv},\n primaryClass={cs.AI},\n url={https://arxiv.org/abs/2509.06942}, \n}\n```",2025-09-08T12:44:15+00:00,model,[19],"model_finetune_model:rockerBOO/flux.1-dev-SRPO, model_quantized_model:wikeeyang/SRPO-Refine-Quantized-v1.0, befox/SRPO-GGUF, wikeeyang/SRPO-for-ComfyUI","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
1,baidu/ERNIE-4.5-21B-A3B-Thinking,"---\nlicense: apache-2.0\nlanguage:\n- en\n- zh\npipeline_tag: text-generation\ntags:\n- ERNIE4.5\nlibrary_name: transformers\n---\n\n<div align=""center"" style=""line-height: 1;"">\n <a href=""https://ernie.baidu.com/"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Chat"" src=""https://img.shields.io/badge/🤖_Chat-ERNIE_Bot-blue"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://huggingface.co/baidu"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Hugging Face"" src=""https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Baidu-ffc107?color=ffc107&logoColor=white"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://github.com/PaddlePaddle/ERNIE"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Github"" src=""https://img.shields.io/badge/GitHub-ERNIE-000?logo=github&color=0000FF"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://ernie.baidu.com/blog/ernie4.5"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Blog"" src=""https://img.shields.io/badge/🖖_Blog-ERNIE4.5-A020A0"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://discord.gg/JPmZXDsEEK"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Discord"" src=""https://img.shields.io/badge/Discord-ERNIE-5865F2?logo=discord&logoColor=white"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://x.com/PaddlePaddle"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""X"" src=""https://img.shields.io/badge/X-PaddlePaddle-6080F0""?logo=x&logoColor=white"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n</div>\n\n<div align=""center"" style=""line-height: 1;"">\n <a href=""#license"" style=""margin: 2px;"">\n <img alt=""License"" src=""https://img.shields.io/badge/License-Apache2.0-A5de54"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n</div>\n\n# ERNIE-4.5-21B-A3B-Thinking\n\n## Model Highlights\n\nOver the past three months, we have continued to scale the **thinking capability** of ERNIE-4.5-21B-A3B, improving both the **quality and depth** of reasoning, thereby advancing the competitiveness of ERNIE **lightweight models** in complex reasoning tasks. We are pleased to introduce **ERNIE-4.5-21B-A3B-Thinking**, featuring the following key enhancements:\n\n* **Significantly improved performance** on reasoning tasks, including logical reasoning, mathematics, science, coding, text generation, and academic benchmarks that typically require human expertise.\n* **Efficient tool usage** capabilities.\n* **Enhanced 128K long-context understanding** capabilities.\n\n> [!NOTE]\n> Note: This version has an increased thinking length. We strongly recommend its use in highly complex reasoning tasks.\n\n![benchmark](./benchmark.png)\n\n## Model Overview\n\nERNIE-4.5-21B-A3B-Thinking is a text MoE post-trained model, with 21B total parameters and 3B activated parameters for each token. The following are the model configuration details:\n\n|Key|Value|\n|-|-|\n|Modality|Text|\n|Training Stage|Posttraining|\n|Params(Total / Activated)|21B / 3B|\n|Layers|28|\n|Heads(Q/KV)|20 / 4|\n|Text Experts(Total / Activated)|64 / 6|\n|Shared Experts|2|\n|Context Length|131072|\n\n## Quickstart\n\n> [!NOTE]\n> To align with the wider community, this model releases Transformer-style weights. Both PyTorch and PaddlePaddle ecosystem tools, such as vLLM, transformers, and FastDeploy, are expected to be able to load and run this model.\n\n### FastDeploy Inference\n\nQuickly deploy services using FastDeploy as shown below. For more detailed usage, refer to the [FastDeploy GitHub Repository](https://github.com/PaddlePaddle/FastDeploy).\n\n**Note**: 80GB x 1 GPU resources are required. Deploying this model requires FastDeploy version 2.2.\n\n```bash\npython -m fastdeploy.entrypoints.openai.api_server \\n --model baidu/ERNIE-4.5-21B-A3B-Thinking \\n --port 8180 \\n --metrics-port 8181 \\n --engine-worker-queue-port 8182 \\n --load_choices ""default_v1"" \\n --tensor-parallel-size 1 \\n --max-model-len 131072 \\n --reasoning-parser ernie_x1 \\n --tool-call-parser ernie_x1 \\n --max-num-seqs 32\n```\n\nThe ERNIE-4.5-21B-A3B-Thinking model supports function call.\n\n```bash\ncurl -X POST ""http://0.0.0.0:8180/v1/chat/completions"" \\n-H ""Content-Type: application/json"" \\n-d $'{\n ""messages"": [\n {\n ""role"": ""user"",\n ""content"": ""How \'s the weather in Beijing today?""\n }\n ],\n ""tools"": [\n {\n ""type"": ""function"",\n ""function"": {\n ""name"": ""get_weather"",\n ""description"": ""Determine weather in my location"",\n ""parameters"": {\n ""type"": ""object"",\n ""properties"": {\n ""location"": {\n ""type"": ""string"",\n ""description"": ""The city and state e.g. San Francisco, CA""\n },\n ""unit"": {\n ""type"": ""string"",\n ""enum"": [\n ""c"",\n ""f""\n ]\n }\n },\n ""additionalProperties"": false,\n ""required"": [\n ""location"",\n ""unit""\n ]\n },\n ""strict"": true\n }\n }]\n}'\n```\n\n### vLLM inference\n\n```bash\nvllm serve baidu/ERNIE-4.5-21B-A3B-Thinking\n```\n\nThe `reasoning-parser` and `tool-call-parser` for vLLM Ernie are currently under development.\n\n### Using `transformers` library\n\n**Note**: You'll need the`transformers`library (version 4.54.0 or newer) installed to use this model.\n\nThe following contains a code snippet illustrating how to use the model generate content based on given inputs.\n\n```python\nimport torch\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\nmodel_name = ""baidu/ERNIE-4.5-21B-A3B-Thinking""\n\n# load the tokenizer and the model\ntokenizer = AutoTokenizer.from_pretrained(model_name)\nmodel = AutoModelForCausalLM.from_pretrained(\n model_name,\n device_map=""auto"",\n torch_dtype=torch.bfloat16,\n)\n\n# prepare the model input\nprompt = ""Give me a short introduction to large language model.""\nmessages = [\n {""role"": ""user"", ""content"": prompt}\n]\ntext = tokenizer.apply_chat_template(\n messages,\n tokenize=False,\n add_generation_prompt=True\n)\nmodel_inputs = tokenizer([text], add_special_tokens=False, return_tensors=""pt"").to(model.device)\n\n# conduct text completion\ngenerated_ids = model.generate(\n **model_inputs,\n max_new_tokens=1024\n)\noutput_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()\n\n# decode the generated ids\ngenerate_text = tokenizer.decode(output_ids, skip_special_tokens=True)\nprint(""generate_text:"", generate_text)\n```\n\n## License\n\nThe ERNIE 4.5 models are provided under the Apache License 2.0. This license permits commercial use, subject to its terms and conditions. Copyright (c) 2025 Baidu, Inc. All Rights Reserved.\n\n## Citation\n\nIf you find ERNIE 4.5 useful or wish to use it in your projects, please kindly cite our technical report:\n\n```text\n@misc{ernie2025technicalreport,\n title={ERNIE 4.5 Technical Report},\n author={Baidu-ERNIE-Team},\n year={2025},\n primaryClass={cs.CL},\n howpublished={\url{https://ernie.baidu.com/blog/publication/ERNIE_Technical_Report.pdf}}\n}\n```\n\n",2025-09-08T14:18:31+00:00,model,[0],"model_finetune_model:unsloth/ERNIE-4.5-21B-A3B-Thinking, model_quantized_model:unsloth/ERNIE-4.5-21B-A3B-Thinking-GGUF, gabriellarson/ERNIE-4.5-21B-A3B-Thinking-GGUF, cpatonn/ERNIE-4.5-21B-A3B-Thinking-AWQ-8bit, cpatonn/ERNIE-4.5-21B-A3B-Thinking-AWQ-4bit, mradermacher/ERNIE-4.5-21B-A3B-Thinking-GGUF, nightmedia/ERNIE-4.5-21B-A3B-Thinking-mxfp4-mlx, wekW/ERNIE-4.5-21B-A3B-Thinking-Q8_0-GGUF","[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
2,Qwen/Qwen3-Next-80B-A3B-Instruct,"---\nlibrary_name: transformers\nlicense: apache-2.0\nlicense_link: https://huggingface.co/Qwen/Qwen3-Next-80B-A3B-Instruct/blob/main/LICENSE\npipeline_tag: text-generation\n---\n\n# Qwen3-Next-80B-A3B-Instruct\n<a href=""https://chat.qwen.ai/"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Chat"" src=""https://img.shields.io/badge/%F0%9F%92%9C%EF%B8%8F%20Qwen%20Chat%20-536af5"" style=""display: inline-block; vertical-align: middle;""/>\n</a>\n\nOver the past few months, we have observed increasingly clear trends toward scaling both total parameters and context lengths in the pursuit of more powerful and agentic artificial intelligence (AI). \nWe are excited to share our latest advancements in addressing these demands, centered on improving scaling efficiency through innovative model architecture. \nWe call this next-generation foundation models **Qwen3-Next**.\n\n## Highlights\n\n**Qwen3-Next-80B-A3B** is the first installment in the Qwen3-Next series and features the following key enchancements:\n- **Hybrid Attention**: Replaces standard attention with the combination of **Gated DeltaNet** and **Gated Attention**, enabling efficient context modeling for ultra-long context length.\n- **High-Sparsity Mixture-of-Experts (MoE)**: Achieves an extreme low activation ratio in MoE layers, drastically reducing FLOPs per token while preserving model capacity. \n- **Stability Optimizations**: Includes techniques such as **zero-centered and weight-decayed layernorm**, and other stabilizing enhancements for robust pre-training and post-training. \n- **Multi-Token Prediction (MTP)**: Boosts pretraining model performance and accelerates inference.\n\nWe are seeing strong performance in terms of both parameter efficiency and inference speed for Qwen3-Next-80B-A3B:\n- Qwen3-Next-80B-A3B-Base outperforms Qwen3-32B-Base on downstream tasks with 10% of the total training cost and with 10 times inference throughput for context over 32K tokens.\n- Qwen3-Next-80B-A3B-Instruct performs on par with Qwen3-235B-A22B-Instruct-2507 on certain benchmarks, while demonstrating significant advantages in handling ultra-long-context tasks up to 256K tokens.\n\n![Qwen3-Next-80B-A3B-Instruct Benchmark Comparison](https://qianwen-res.oss-accelerate.aliyuncs.com/Qwen3-Next/Qwen3-Next-80B-A3B-Instruct.001.jpeg)\n\nFor more details, please refer to our blog post [Qwen3-Next](https://qwen.ai/blog?id=4074cca80393150c248e508aa62983f9cb7d27cd&from=research.latest-advancements-list).\n\n## Model Overview\n\n> [!Note]\n> **Qwen3-Next-80B-A3B-Instruct** supports only instruct (non-thinking) mode and does not generate ``<think></think>`` blocks in its output.\n\n**Qwen3-Next-80B-A3B-Instruct** has the following features:\n- Type: Causal Language Models\n- Training Stage: Pretraining (15T tokens) & Post-training\n- Number of Parameters: 80B in total and 3B activated\n- Number of Paramaters (Non-Embedding): 79B\n- Hidden Dimension: 2048\n- Number of Layers: 48\n - Hybrid Layout: 12 \* (3 \* (Gated DeltaNet -> MoE) -> 1 \* (Gated Attention -> MoE))\n- Gated Attention:\n - Number of Attention Heads: 16 for Q and 2 for KV\n - Head Dimension: 256\n - Rotary Position Embedding Dimension: 64\n- Gated DeltaNet:\n - Number of Linear Attention Heads: 32 for V and 16 for QK\n - Head Dimension: 128\n- Mixture of Experts:\n - Number of Experts: 512\n - Number of Activated Experts: 10\n - Number of Shared Experts: 1\n - Expert Intermediate Dimension: 512\n- Context Length: 262,144 natively and extensible up to 1,010,000 tokens\n\n<img src=""https://qianwen-res.oss-accelerate.aliyuncs.com/Qwen3-Next/model_architecture.png"" height=""384px"" title=""Qwen3-Next Model Architecture"" />\n\n\n## Performance\n\n| | Qwen3-30B-A3B-Instruct-2507 | Qwen3-32B Non-Thinking | Qwen3-235B-A22B-Instruct-2507 | Qwen3-Next-80B-A3B-Instruct |\n|--- | --- | --- | --- | --- |\n| **Knowledge** | | | | |\n| MMLU-Pro | 78.4 | 71.9 | **83.0** | 80.6 |\n| MMLU-Redux | 89.3 | 85.7 | **93.1** | 90.9 |\n| GPQA | 70.4 | 54.6 | **77.5** | 72.9 |\n| SuperGPQA | 53.4 | 43.2 | **62.6** | 58.8 |\n| **Reasoning** | | | | |\n| AIME25 | 61.3 | 20.2 | **70.3** | 69.5 |\n| HMMT25 | 43.0 | 9.8 | **55.4** | 54.1 |\n| LiveBench 20241125 | 69.0 | 59.8 | 75.4 | **75.8** |\n| **Coding** | | | | |\n| LiveCodeBench v6 (25.02-25.05) | 43.2 | 29.1 | 51.8 | **56.6** |\n| MultiPL-E | 83.8 | 76.9 | **87.9** | 87.8 |\n| Aider-Polyglot | 35.6 | 40.0 | **57.3** | 49.8 |\n| **Alignment** | | | | |\n| IFEval | 84.7 | 83.2 | **88.7** | 87.6 |\n| Arena-Hard v2* | 69.0 | 34.1 | 79.2 | **82.7** |\n| Creative Writing v3 | 86.0 | 78.3 | **87.5** | 85.3 |\n| WritingBench | 85.5 | 75.4 | 85.2 | **87.3** |\n| **Agent** | | | | |\n| BFCL-v3 | 65.1 | 63.0 | **70.9** | 70.3 |\n| TAU1-Retail | 59.1 | 40.1 | **71.3** | 60.9 |\n| TAU1-Airline | 40.0 | 17.0 | **44.0** | 44.0 |\n| TAU2-Retail | 57.0 | 48.8 | **74.6** | 57.3 |\n| TAU2-Airline | 38.0 | 24.0 | **50.0** | 45.5 |\n| TAU2-Telecom | 12.3 | 24.6 | **32.5** | 13.2 |\n| **Multilingualism** | | | | |\n| MultiIF | 67.9 | 70.7 | **77.5** | 75.8 |\n| MMLU-ProX | 72.0 | 69.3 | **79.4** | 76.7 |\n| INCLUDE | 71.9 | 70.9 | **79.5** | 78.9 |\n| PolyMATH | 43.1 | 22.5 | **50.2** | 45.9 |\n\n*: For reproducibility, we report the win rates evaluated by GPT-4.1.\n\n## Quickstart\n\nThe code for Qwen3-Next has been merged into the main branch of Hugging Face `transformers`.\n\n```shell\npip install git+https://github.com/huggingface/transformers.git@main\n```\n\nWith earlier versions, you will encounter the following error:\n```\nKeyError: 'qwen3_next'\n```\n\nThe following contains a code snippet illustrating how to use the model generate content based on given inputs. \n```python\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\nmodel_name = ""Qwen/Qwen3-Next-80B-A3B-Instruct""\n\n# load the tokenizer and the model\ntokenizer = AutoTokenizer.from_pretrained(model_name)\nmodel = AutoModelForCausalLM.from_pretrained(\n model_name,\n dtype=""auto"",\n device_map=""auto"",\n)\n\n# prepare the model input\nprompt = ""Give me a short introduction to large language model.""\nmessages = [\n {""role"": ""user"", ""content"": prompt},\n]\ntext = tokenizer.apply_chat_template(\n messages,\n tokenize=False,\n add_generation_prompt=True,\n)\nmodel_inputs = tokenizer([text], return_tensors=""pt"").to(model.device)\n\n# conduct text completion\ngenerated_ids = model.generate(\n **model_inputs,\n max_new_tokens=16384,\n)\noutput_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist() \n\ncontent = tokenizer.decode(output_ids, skip_special_tokens=True)\n\nprint(""content:"", content)\n```\n\n> [!Note]\n> Multi-Token Prediction (MTP) is not generally available in Hugging Face Transformers.\n\n> [!Note]\n> The efficiency or throughput improvement depends highly on the implementation.\n> It is recommended to adopt a dedicated inference framework, e.g., SGLang and vLLM, for inference tasks.\n\n> [!Tip]\n> Depending on the inference settings, you may observe better efficiency with [`flash-linear-attention`](https://github.com/fla-org/flash-linear-attention#installation) and [`causal-conv1d`](https://github.com/Dao-AILab/causal-conv1d).\n> See the links for detailed instructions and requirements.\n\n\n## Deployment\n\nFor deployment, you can use the latest `sglang` or `vllm` to create an OpenAI-compatible API endpoint.\n\n### SGLang\n\n[SGLang](https://github.com/sgl-project/sglang) is a fast serving framework for large language models and vision language models.\nSGLang could be used to launch a server with OpenAI-compatible API service. \n\n`sglang>=0.5.2` is required for Qwen3-Next, which can be installed using:\n```shell\npip install 'sglang[all]>=0.5.2'\n```\nSee [its documentation](https://docs.sglang.ai/get_started/install.html) for more details.\n\nThe following command can be used to create an API endpoint at `http://localhost:30000/v1` with maximum context length 256K tokens using tensor parallel on 4 GPUs.\n```shell\npython -m sglang.launch_server --model-path Qwen/Qwen3-Next-80B-A3B-Instruct --port 30000 --tp-size 4 --context-length 262144 --mem-fraction-static 0.8\n```\n\nThe following command is recommended for MTP with the rest settings the same as above:\n```shell\npython -m sglang.launch_server --model-path Qwen/Qwen3-Next-80B-A3B-Instruct --port 30000 --tp-size 4 --context-length 262144 --mem-fraction-static 0.8 --speculative-algo NEXTN --speculative-num-steps 3 --speculative-eagle-topk 1 --speculative-num-draft-tokens 4\n```\n\n> [!Note]\n> The default context length is 256K. Consider reducing the context length to a smaller value, e.g., `32768`, if the server fails to start.\n\nPlease also refer to SGLang's usage guide on [Qwen3-Next](https://docs.sglang.ai/basic_usage/qwen3.html).\n\n### vLLM\n\n[vLLM](https://github.com/vllm-project/vllm) is a high-throughput and memory-efficient inference and serving engine for LLMs.\nvLLM could be used to launch a server with OpenAI-compatible API service. \n\n`vllm>=0.10.2` is required for Qwen3-Next, which can be installed using:\n```shell\npip install 'vllm>=0.10.2'\n```\nSee [its documentation](https://docs.vllm.ai/en/stable/getting_started/installation/index.html) for more details.\n\nThe following command can be used to create an API endpoint at `http://localhost:8000/v1` with maximum context length 256K tokens using tensor parallel on 4 GPUs.\n```shell\nvllm serve Qwen/Qwen3-Next-80B-A3B-Instruct --port 8000 --tensor-parallel-size 4 --max-model-len 262144\n```\n\nThe following command is recommended for MTP with the rest settings the same as above:\n```shell\nvllm serve Qwen/Qwen3-Next-80B-A3B-Instruct --port 8000 --tensor-parallel-size 4 --max-model-len 262144 --speculative-config '{""method"":""qwen3_next_mtp"",""num_speculative_tokens"":2}'\n```\n\n> [!Note]\n> The default context length is 256K. Consider reducing the context length to a smaller value, e.g., `32768`, if the server fails to start.\n\nPlease also refer to vLLM's usage guide on [Qwen3-Next](https://docs.vllm.ai/projects/recipes/en/latest/Qwen/Qwen3-Next.html).\n\n## Agentic Use\n\nQwen3 excels in tool calling capabilities. We recommend using [Qwen-Agent](https://github.com/QwenLM/Qwen-Agent) to make the best use of agentic ability of Qwen3. Qwen-Agent encapsulates tool-calling templates and tool-calling parsers internally, greatly reducing coding complexity.\n\nTo define the available tools, you can use the MCP configuration file, use the integrated tool of Qwen-Agent, or integrate other tools by yourself.\n```python\nfrom qwen_agent.agents import Assistant\n\n# Define LLM\nllm_cfg = {\n 'model': 'Qwen3-Next-80B-A3B-Instruct',\n\n # Use a custom endpoint compatible with OpenAI API:\n 'model_server': 'http://localhost:8000/v1', # api_base\n 'api_key': 'EMPTY',\n}\n\n# Define Tools\ntools = [\n {'mcpServers': { # You can specify the MCP configuration file\n 'time': {\n 'command': 'uvx',\n 'args': ['mcp-server-time', '--local-timezone=Asia/Shanghai']\n },\n ""fetch"": {\n ""command"": ""uvx"",\n ""args"": [""mcp-server-fetch""]\n }\n }\n },\n 'code_interpreter', # Built-in tools\n]\n\n# Define Agent\nbot = Assistant(llm=llm_cfg, function_list=tools)\n\n# Streaming generation\nmessages = [{'role': 'user', 'content': 'https://qwenlm.github.io/blog/ Introduce the latest developments of Qwen'}]\nfor responses in bot.run(messages=messages):\n pass\nprint(responses)\n```\n\n\n## Processing Ultra-Long Texts\n\nQwen3-Next natively supports context lengths of up to 262,144 tokens. \nFor conversations where the total length (including both input and output) significantly exceeds this limit, we recommend using RoPE scaling techniques to handle long texts effectively. \nWe have validated the model's performance on context lengths of up to 1 million tokens using the [YaRN](https://arxiv.org/abs/2309.00071) method.\n\nYaRN is currently supported by several inference frameworks, e.g., `transformers`, `vllm` and `sglang`. \nIn general, there are two approaches to enabling YaRN for supported frameworks:\n\n- Modifying the model files:\n In the `config.json` file, add the `rope_scaling` fields:\n ```json\n {\n ...,\n ""rope_scaling"": {\n ""rope_type"": ""yarn"",\n ""factor"": 4.0,\n ""original_max_position_embeddings"": 262144\n }\n }\n ```\n\n- Passing command line arguments:\n\n For `vllm`, you can use\n ```shell\n VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 vllm serve ... --rope-scaling '{""rope_type"":""yarn"",""factor"":4.0,""original_max_position_embeddings"":262144}' --max-model-len 1010000 \n ```\n\n For `sglang`, you can use\n ```shell\n SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1 python -m sglang.launch_server ... --json-model-override-args '{""rope_scaling"":{""rope_type"":""yarn"",""factor"":4.0,""original_max_position_embeddings"":262144}}' --context-length 1010000\n ```\n\n> [!NOTE]\n> All the notable open-source frameworks implement static YaRN, which means the scaling factor remains constant regardless of input length, **potentially impacting performance on shorter texts.**\n> We advise adding the `rope_scaling` configuration only when processing long contexts is required. \n> It is also recommended to modify the `factor` as needed. For example, if the typical context length for your application is 524,288 tokens, it would be better to set `factor` as 2.0. \n\n#### Long-Context Performance\n\nWe test the model on an 1M version of the [RULER](https://arxiv.org/abs/2404.06654) benchmark.\n\n| Model Name | Acc avg | 4k | 8k | 16k | 32k | 64k | 96k | 128k | 192k | 256k | 384k | 512k | 640k | 768k | 896k | 1000k |\n|---------------------------------------------|---------|------|------|------|------|------|------|------|------|------|------|------|------|------|------|-------|\n| Qwen3-30B-A3B-Instruct-2507 | 86.8 | 98.0 | 96.7 | 96.9 | 97.2 | 93.4 | 91.0 | 89.1 | 89.8 | 82.5 | 83.6 | 78.4 | 79.7 | 77.6 | 75.7 | 72.8 |\n| Qwen3-235B-A22B-Instruct-2507 | 92.5 | 98.5 | 97.6 | 96.9 | 97.3 | 95.8 | 94.9 | 93.9 | 94.5 | 91.0 | 92.2 | 90.9 | 87.8 | 84.8 | 86.5 | 84.5 |\n| Qwen3-Next-80B-A3B-Instruct | 91.8 | 98.5 | 99.0 | 98.0 | 98.7 | 97.6 | 95.0 | 96.0 | 94.0 | 93.5 | 91.7 | 86.9 | 85.5 | 81.7 | 80.3 | 80.3 |\n\n* Qwen3-Next are evaluated with YaRN enabled. Qwen3-2507 models are evaluated with Dual Chunk Attention enabled.\n* Since the evaluation is time-consuming, we use 260 samples for each length (13 sub-tasks, 20 samples for each).\n\n## Best Practices\n\nTo achieve optimal performance, we recommend the following settings:\n\n1. **Sampling Parameters**:\n - We suggest using `Temperature=0.7`, `TopP=0.8`, `TopK=20`, and `MinP=0`.\n - For supported frameworks, you can adjust the `presence_penalty` parameter between 0 and 2 to reduce endless repetitions. However, using a higher value may occasionally result in language mixing and a slight decrease in model performance.\n\n2. **Adequate Output Length**: We recommend using an output length of 16,384 tokens for most queries, which is adequate for instruct models.\n\n3. **Standardize Output Format**: We recommend using prompts to standardize model outputs when benchmarking.\n - **Math Problems**: Include ""Please reason step by step, and put your final answer within \boxed{}."" in the prompt.\n - **Multiple-Choice Questions**: Add the following JSON structure to the prompt to standardize responses: ""Please show your choice in the `answer` field with only the choice letter, e.g., `""answer"": ""C""`.""\n\n### Citation\n\nIf you find our work helpful, feel free to give us a cite.\n\n```\n@misc{qwen3technicalreport,\n title={Qwen3 Technical Report}, \n author={Qwen Team},\n year={2025},\n eprint={2505.09388},\n archivePrefix={arXiv},\n primaryClass={cs.CL},\n url={https://arxiv.org/abs/2505.09388}, \n}\n\n@article{qwen2.5-1m,\n title={Qwen2.5-1M Technical Report}, \n author={An Yang and Bowen Yu and Chengyuan Li and Dayiheng Liu and Fei Huang and Haoyan Huang and Jiandong Jiang and Jianhong Tu and Jianwei Zhang and Jingren Zhou and Junyang Lin and Kai Dang and Kexin Yang and Le Yu and Mei Li and Minmin Sun and Qin Zhu and Rui Men and Tao He and Weijia Xu and Wenbiao Yin and Wenyuan Yu and Xiafei Qiu and Xingzhang Ren and Xinlong Yang and Yong Li and Zhiying Xu and Zipeng Zhang},\n journal={arXiv preprint arXiv:2501.15383},\n year={2025}\n}\n```",2025-09-09T15:40:56+00:00,model,[0],"model_finetune_model:unsloth/Qwen3-Next-80B-A3B-Instruct, tiny-random/qwen3-next-moe, kikekewl/Qwen3-Next-80B-A3B-mlx-bf16, model_quantized_model:cpatonn/Qwen3-Next-80B-A3B-Instruct-AWQ-4bit, unsloth/Qwen3-Next-80B-A3B-Instruct-bnb-4bit, mlx-community/Qwen3-Next-80B-A3B-Instruct-4bit, Intel/Qwen3-Next-80B-A3B-Instruct-int4-mixed-AutoRound, nightmedia/Qwen3-Next-80B-A3B-Instruct-q2-mlx, nightmedia/Qwen3-Next-80B-A3B-Instruct-mxfp4-mlx, DevQuasar/Qwen.Qwen3-Next-80B-A3B-Instruct-FP8-Dynamic, TheClusterDev/Qwen3-Next-80B-A3B-Instruct-FP8, nightmedia/Qwen3-Next-80B-A3B-Instruct-qx86-hi-mlx, mlx-community/Qwen3-Next-80B-A3B-Instruct-5bit, mlx-community/Qwen3-Next-80B-A3B-Instruct-6bit, mlx-community/Qwen3-Next-80B-A3B-Instruct-8bit, TheClusterDev/Qwen3-Next-80B-A3B-Instruct-FP8-Dynamic","[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
3,Qwen/Qwen3-Next-80B-A3B-Thinking,"---\nlibrary_name: transformers\nlicense: apache-2.0\nlicense_link: https://huggingface.co/Qwen/Qwen3-Next-80B-A3B-Thinking/blob/main/LICENSE\npipeline_tag: text-generation\n---\n\n# Qwen3-Next-80B-A3B-Thinking\n<a href=""https://chat.qwen.ai/"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Chat"" src=""https://img.shields.io/badge/%F0%9F%92%9C%EF%B8%8F%20Qwen%20Chat%20-536af5"" style=""display: inline-block; vertical-align: middle;""/>\n</a>\n\nOver the past few months, we have observed increasingly clear trends toward scaling both total parameters and context lengths in the pursuit of more powerful and agentic artificial intelligence (AI). \nWe are excited to share our latest advancements in addressing these demands, centered on improving scaling efficiency through innovative model architecture. \nWe call this next-generation foundation models **Qwen3-Next**.\n\n## Highlights\n\n**Qwen3-Next-80B-A3B** is the first installment in the Qwen3-Next series and features the following key enchancements:\n- **Hybrid Attention**: Replaces standard attention with the combination of **Gated DeltaNet** and **Gated Attention**, enabling efficient context modeling for ultra-long context length.\n- **High-Sparsity Mixture-of-Experts (MoE)**: Achieves an extreme low activation ratio in MoE layers, drastically reducing FLOPs per token while preserving model capacity. \n- **Stability Optimizations**: Includes techniques such as **zero-centered and weight-decayed layernorm**, and other stabilizing enhancements for robust pre-training and post-training. \n- **Multi-Token Prediction (MTP)**: Boosts pretraining model performance and accelerates inference.\n\nWe are seeing strong performance in terms of both parameter efficiency and inference speed for Qwen3-Next-80B-A3B:\n- Qwen3-Next-80B-A3B-Base outperforms Qwen3-32B-Base on downstream tasks with 10% of the total training cost and with 10 times inference throughput for context over 32K tokens.\n- Leveraging [GSPO](https://qwenlm.github.io/blog/gspo/), we have addressed the stability and efficiency challenges posed by the hybrid attention mechanism combined with a high-sparsity MoE architecture in RL training. \n Qwen3-Next-80B-A3B-Thinking demonstrates outstanding performance on complex reasoning tasks, not only **surpassing Qwen3-30B-A3B-Thinking-2507 and Qwen3-32B-Thinking**, but also **outperforming the proprietary model Gemini-2.5-Flash-Thinking** across multiple benchmarks.\n\n![Qwen3-Next-80B-A3B-Thinking Benchmark Comparison](https://qianwen-res.oss-accelerate.aliyuncs.com/Qwen3-Next/Qwen3-Next-80B-A3B-Thinking.001.jpeg)\n\nFor more details, please refer to our blog post [Qwen3-Next](https://qwen.ai/blog?id=4074cca80393150c248e508aa62983f9cb7d27cd&from=research.latest-advancements-list).\n\n## Model Overview\n\n> [!Note]\n> **Qwen3-Next-80B-A3B-Thinking** supports only thinking mode. \n> To enforce model thinking, the default chat template automatically includes `<think>`. \n> Therefore, it is normal for the model's output to contain only `</think>` without an explicit opening `<think>` tag.\n\n> [!Note]\n> **Qwen3-Next-80B-A3B-Thinking** may generate thinking content longer than its predecessor.\n> We strongly recommend its use in highly complex reasoning tasks.\n\n\n**Qwen3-Next-80B-A3B-Thinking** has the following features:\n- Type: Causal Language Models\n- Training Stage: Pretraining (15T tokens) & Post-training\n- Number of Parameters: 80B in total and 3B activated\n- Number of Paramaters (Non-Embedding): 79B\n- Hidden Dimension: 2048\n- Number of Layers: 48\n - Hybrid Layout: 12 \* (3 \* (Gated DeltaNet -> MoE) -> 1 \* (Gated Attention -> MoE))\n- Gated Attention:\n - Number of Attention Heads: 16 for Q and 2 for KV\n - Head Dimension: 256\n - Rotary Position Embedding Dimension: 64\n- Gated DeltaNet:\n - Number of Linear Attention Heads: 32 for V and 16 for QK\n - Head Dimension: 128\n- Mixture of Experts:\n - Number of Experts: 512\n - Number of Activated Experts: 10\n - Number of Shared Experts: 1\n - Expert Intermediate Dimension: 512\n- Context Length: 262,144 natively and extensible up to 1,010,000 tokens\n\n<img src=""https://qianwen-res.oss-accelerate.aliyuncs.com/Qwen3-Next/model_architecture.png"" height=""384px"" title=""Qwen3-Next Model Architecture"" />\n\n\n## Performance\n\n| | Qwen3-30B-A3B-Thinking-2507 | Qwen3-32B Thinking | Qwen3-235B-A22B-Thinking-2507 | Gemini-2.5-Flash Thinking | Qwen3-Next-80B-A3B-Thinking |\n|--- | --- | --- | --- | --- | --- |\n| **Knowledge** | | | | |\n| MMLU-Pro | 80.9 | 79.1 | **84.4** | 81.9 | 82.7 |\n| MMLU-Redux | 91.4 | 90.9 | **93.8** | 92.1 | 92.5 |\n| GPQA | 73.4 | 68.4 | 81.1 | **82.8** | 77.2 |\n| SuperGPQA | 56.8 | 54.1 | **64.9** | 57.8 | 60.8 |\n| **Reasoning** | | | | |\n| AIME25 | 85.0 | 72.9 | **92.3** | 72.0 | 87.8 |\n| HMMT25 | 71.4 | 51.5 | **83.9** | 64.2 | 73.9 |\n| LiveBench 241125 | 76.8 | 74.9 | **78.4** | 74.3 | 76.6 |\n| **Coding** | | | | |\n| LiveCodeBench v6 (25.02-25.05) | 66.0 | 60.6 | **74.1** | 61.2 | 68.7 |\n| CFEval | 2044 | 1986 | **2134** | 1995 | 2071 |\n| OJBench | 25.1 | 24.1 | **32.5** | 23.5 | 29.7 |\n| **Alignment** | | | | |\n| IFEval | 88.9 | 85.0 | 87.8 | **89.8** | 88.9 |\n| Arena-Hard v2* | 56.0 | 48.4 | **79.7** | 56.7 | 62.3 |\n| WritingBench | 85.0 | 79.0 | **88.3** | 83.9 | 84.6 |\n| **Agent** | | | | |\n| BFCL-v3 | **72.4** | 70.3 | 71.9 | 68.6 | 72.0 |\n| TAU1-Retail | 67.8 | 52.8 | 67.8 | 65.2 | **69.6** |\n| TAU1-Airline | 48.0 | 29.0 | 46.0 | **54.0** | 49.0 |\n| TAU2-Retail | 58.8 | 49.7 | **71.9** | 66.7 | 67.8 |\n| TAU2-Airline | 58.0 | 45.5 | 58.0 | 52.0 | **60.5** |\n| TAU2-Telecom | 26.3 | 27.2 | **45.6** | 31.6 | 43.9 |\n| **Multilingualism** | | | | |\n| MultiIF | 76.4 | 73.0 | **80.6** | 74.4 | 77.8 |\n| MMLU-ProX | 76.4 | 74.6 | **81.0** | 80.2 | 78.7 |\n| INCLUDE | 74.4 | 73.7 | 81.0 | **83.9** | 78.9 |\n| PolyMATH | 52.6 | 47.4 | **60.1** | 49.8 | 56.3 |\n\n*: For reproducibility, we report the win rates evaluated by GPT-4.1.\n\n## Quickstart\n\nThe code for Qwen3-Next has been merged into the main branch of Hugging Face `transformers`. \n\n```shell\npip install git+https://github.com/huggingface/transformers.git@main\n```\n\nWith earlier versions, you will encounter the following error:\n```\nKeyError: 'qwen3_next'\n```\n\nThe following contains a code snippet illustrating how to use the model generate content based on given inputs. \n```python\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\nmodel_name = ""Qwen/Qwen3-Next-80B-A3B-Thinking""\n\n# load the tokenizer and the model\ntokenizer = AutoTokenizer.from_pretrained(model_name)\nmodel = AutoModelForCausalLM.from_pretrained(\n model_name,\n dtype=""auto"",\n device_map=""auto""\n)\n\n# prepare the model input\nprompt = ""Give me a short introduction to large language model.""\nmessages = [\n {""role"": ""user"", ""content"": prompt},\n]\ntext = tokenizer.apply_chat_template(\n messages,\n tokenize=False,\n add_generation_prompt=True,\n)\nmodel_inputs = tokenizer([text], return_tensors=""pt"").to(model.device)\n\n# conduct text completion\ngenerated_ids = model.generate(\n **model_inputs,\n max_new_tokens=32768,\n)\noutput_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist() \n\n# parsing thinking content\ntry:\n # rindex finding 151668 (</think>)\n index = len(output_ids) - output_ids[::-1].index(151668)\nexcept ValueError:\n index = 0\n\nthinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip(""\n"")\ncontent = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip(""\n"")\n\nprint(""thinking content:"", thinking_content) # no opening <think> tag\nprint(""content:"", content)\n```\n\n> [!Note]\n> Multi-Token Prediction (MTP) is not generally available in Hugging Face Transformers.\n\n> [!Note]\n> The efficiency or throughput improvement depends highly on the implementation.\n> It is recommended to adopt a dedicated inference framework, e.g., SGLang and vLLM, for inference tasks.\n\n> [!Tip]\n> Depending on the inference settings, you may observe better efficiency with [`flash-linear-attention`](https://github.com/fla-org/flash-linear-attention#installation) and [`causal-conv1d`](https://github.com/Dao-AILab/causal-conv1d).\n> See the links for detailed instructions and requirements.\n\n## Deployment\n\nFor deployment, you can use the latest `sglang` or `vllm` to create an OpenAI-compatible API endpoint.\n\n### SGLang\n\n[SGLang](https://github.com/sgl-project/sglang) is a fast serving framework for large language models and vision language models.\nSGLang could be used to launch a server with OpenAI-compatible API service. \n\n`sglang>=0.5.2` is required for Qwen3-Next, which can be installed using:\n```shell\npip install 'sglang[all]>=0.5.2'\n```\nSee [its documentation](https://docs.sglang.ai/get_started/install.html) for more details.\n\nThe following command can be used to create an API endpoint at `http://localhost:30000/v1` with maximum context length 256K tokens using tensor parallel on 4 GPUs.\n```shell\npython -m sglang.launch_server --model-path Qwen/Qwen3-Next-80B-A3B-Thinking --port 30000 --tp-size 4 --context-length 262144 --reasoning-parser deepseek-r1 --mem-fraction-static 0.8\n```\n\nThe following command is recommended for MTP with the rest settings the same as above:\n```shell\npython -m sglang.launch_server --model-path Qwen/Qwen3-Next-80B-A3B-Thinking --port 30000 --tp-size 4 --context-length 262144 --reasoning-parser deepseek-r1 --mem-fraction-static 0.8 --speculative-algo NEXTN --speculative-num-steps 3 --speculative-eagle-topk 1 --speculative-num-draft-tokens 4\n```\n\n> [!Note]\n> The default context length is 256K. \n> If you encounter out-of-memory (OOM) issues, you may consider reducing the context length to a smaller value. \n> However, since the model may require longer token sequences for reasoning, we strongly recommend using a context length greater than 131,072.\n\nPlease also refer to SGLang's usage guide on [Qwen3-Next](https://docs.sglang.ai/basic_usage/qwen3.html).\n\n### vLLM\n\n[vLLM](https://github.com/vllm-project/vllm) is a high-throughput and memory-efficient inference and serving engine for LLMs.\nvLLM could be used to launch a server with OpenAI-compatible API service. \n\n`vllm>=0.10.2` is required for Qwen3-Next, which can be installed using:\n```shell\npip install 'vllm>=0.10.2'\n```\nSee [its documentation](https://docs.vllm.ai/en/stable/getting_started/installation/index.html) for more details.\n\nThe following command can be used to create an API endpoint at `http://localhost:8000/v1` with maximum context length 256K tokens using tensor parallel on 4 GPUs.\n```shell\nvllm serve Qwen/Qwen3-Next-80B-A3B-Thinking --port 8000 --tensor-parallel-size 4 --max-model-len 262144 --reasoning-parser deepseek_r1\n```\n\nThe following command is recommended for MTP with the rest settings the same as above:\n```shell\nvllm serve Qwen/Qwen3-Next-80B-A3B-Thinking --port 8000 --tensor-parallel-size 4 --max-model-len 262144 --reasoning-parser deepseek_r1 --speculative-config '{""method"":""qwen3_next_mtp"",""num_speculative_tokens"":2}'\n```\n\n> [!Note]\n> The default context length is 256K. \n> If you encounter out-of-memory (OOM) issues, you may consider reducing the context length to a smaller value. \n> However, since the model may require longer token sequences for reasoning, we strongly recommend using a context length greater than 131,072 when possible.\n\nPlease also refer to vLLM's usage guide on [Qwen3-Next](https://docs.vllm.ai/projects/recipes/en/latest/Qwen/Qwen3-Next.html).\n\n\n## Agentic Use\n\nQwen3 excels in tool calling capabilities. We recommend using [Qwen-Agent](https://github.com/QwenLM/Qwen-Agent) to make the best use of agentic ability of Qwen3. Qwen-Agent encapsulates tool-calling templates and tool-calling parsers internally, greatly reducing coding complexity.\n\nTo define the available tools, you can use the MCP configuration file, use the integrated tool of Qwen-Agent, or integrate other tools by yourself.\n```python\nfrom qwen_agent.agents import Assistant\n\n# Define LLM\n# Using Alibaba Cloud Model Studio\nllm_cfg = {\n 'model': 'Qwen3-Next-80B-A3B-Thinking',\n 'model_type': 'qwen_dashscope',\n}\n\n# Using OpenAI-compatible API endpoint. It is recommended to disable the reasoning and the tool call parsing\n# functionality of the deployment frameworks and let Qwen-Agent automate the related operations. For example, \n# `vllm serve Qwen/Qwen3-Next-80B-A3B-Thinking --served-model-name Qwen3-Next-80B-A3B-Thinking --port 8000 --tensor-parallel-size 4 --max-model-len 262144`.\n#\n# llm_cfg = {\n# 'model': 'Qwen3-Next-80B-A3B-Thinking',\n# \n# # Use a custom endpoint compatible with OpenAI API:\n# 'model_server': 'http://localhost:8000/v1', # api_base without reasoning and tool call parsing\n# 'api_key': 'EMPTY',\n# 'generate_cfg': {\n# 'thought_in_content': True,\n# },\n# }\n\n# Define Tools\ntools = [\n {'mcpServers': { # You can specify the MCP configuration file\n 'time': {\n 'command': 'uvx',\n 'args': ['mcp-server-time', '--local-timezone=Asia/Shanghai']\n },\n ""fetch"": {\n ""command"": ""uvx"",\n ""args"": [""mcp-server-fetch""]\n }\n }\n },\n 'code_interpreter', # Built-in tools\n]\n\n# Define Agent\nbot = Assistant(llm=llm_cfg, function_list=tools)\n\n# Streaming generation\nmessages = [{'role': 'user', 'content': 'https://qwenlm.github.io/blog/ Introduce the latest developments of Qwen'}]\nfor responses in bot.run(messages=messages):\n pass\nprint(responses)\n```\n\n\n## Processing Ultra-Long Texts\n\nQwen3-Next natively supports context lengths of up to 262,144 tokens. \nFor conversations where the total length (including both input and output) significantly exceeds this limit, we recommend using RoPE scaling techniques to handle long texts effectively. \nWe have validated the model's performance on context lengths of up to 1 million tokens using the [YaRN](https://arxiv.org/abs/2309.00071) method.\n\nYaRN is currently supported by several inference frameworks, e.g., `transformers`, `vllm` and `sglang`. \nIn general, there are two approaches to enabling YaRN for supported frameworks:\n\n- Modifying the model files:\n In the `config.json` file, add the `rope_scaling` fields:\n ```json\n {\n ...,\n ""rope_scaling"": {\n ""rope_type"": ""yarn"",\n ""factor"": 4.0,\n ""original_max_position_embeddings"": 262144\n }\n }\n ```\n\n- Passing command line arguments:\n\n For `vllm`, you can use\n ```shell\n VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 vllm serve ... --rope-scaling '{""rope_type"":""yarn"",""factor"":4.0,""original_max_position_embeddings"":262144}' --max-model-len 1010000 \n ```\n\n For `sglang`, you can use\n ```shell\n SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1 python -m sglang.launch_server ... --json-model-override-args '{""rope_scaling"":{""rope_type"":""yarn"",""factor"":4.0,""original_max_position_embeddings"":262144}}' --context-length 1010000\n ```\n\n> [!NOTE]\n> All the notable open-source frameworks implement static YaRN, which means the scaling factor remains constant regardless of input length, **potentially impacting performance on shorter texts.**\n> We advise adding the `rope_scaling` configuration only when processing long contexts is required. \n> It is also recommended to modify the `factor` as needed. For example, if the typical context length for your application is 524,288 tokens, it would be better to set `factor` as 2.0. \n\n## Best Practices\n\nTo achieve optimal performance, we recommend the following settings:\n\n1. **Sampling Parameters**:\n - We suggest using `Temperature=0.6`, `TopP=0.95`, `TopK=20`, and `MinP=0`.\n - For supported frameworks, you can adjust the `presence_penalty` parameter between 0 and 2 to reduce endless repetitions. However, using a higher value may occasionally result in language mixing and a slight decrease in model performance.\n\n2. **Adequate Output Length**: We recommend using an output length of 32,768 tokens for most queries. For benchmarking on highly complex problems, such as those found in math and programming competitions, we suggest setting the max output length to 81,920 tokens. This provides the model with sufficient space to generate detailed and comprehensive responses, thereby enhancing its overall performance.\n\n3. **Standardize Output Format**: We recommend using prompts to standardize model outputs when benchmarking.\n - **Math Problems**: Include ""Please reason step by step, and put your final answer within \boxed{}."" in the prompt.\n - **Multiple-Choice Questions**: Add the following JSON structure to the prompt to standardize responses: ""Please show your choice in the `answer` field with only the choice letter, e.g., `""answer"": ""C""`.""\n\n4. **No Thinking Content in History**: In multi-turn conversations, the historical model output should only include the final output part and does not need to include the thinking content. It is implemented in the provided chat template in Jinja2. However, for frameworks that do not directly use the Jinja2 chat template, it is up to the developers to ensure that the best practice is followed.\n\n### Citation\n\nIf you find our work helpful, feel free to give us a cite.\n\n```\n@misc{qwen3technicalreport,\n title={Qwen3 Technical Report}, \n author={Qwen Team},\n year={2025},\n eprint={2505.09388},\n archivePrefix={arXiv},\n primaryClass={cs.CL},\n url={https://arxiv.org/abs/2505.09388}, \n}\n\n@article{qwen2.5-1m,\n title={Qwen2.5-1M Technical Report}, \n author={An Yang and Bowen Yu and Chengyuan Li and Dayiheng Liu and Fei Huang and Haoyan Huang and Jiandong Jiang and Jianhong Tu and Jianwei Zhang and Jingren Zhou and Junyang Lin and Kai Dang and Kexin Yang and Le Yu and Mei Li and Minmin Sun and Qin Zhu and Rui Men and Tao He and Weijia Xu and Wenbiao Yin and Wenyuan Yu and Xiafei Qiu and Xingzhang Ren and Xinlong Yang and Yong Li and Zhiying Xu and Zipeng Zhang},\n journal={arXiv preprint arXiv:2501.15383},\n year={2025}\n}\n```",2025-09-09T15:45:31+00:00,model,[0],"model_finetune_model:kikekewl/Qwen3-Next-80B-A3B-Thinking-mlx-bf16, model_quantized_model:Intel/Qwen3-Next-80B-A3B-Thinking-int4-mixed-AutoRound, cpatonn/Qwen3-Next-80B-A3B-Thinking-AWQ-4bit, llllwxxx/Qwen3-Next-80B-A3B-Thinking-FP8-Dynamic, mlx-community/Qwen3-Next-80B-A3B-Thinking-4bit","[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
4,google/embeddinggemma-300m,"---\nlicense: gemma\npipeline_tag: sentence-similarity\nlibrary_name: sentence-transformers\ntags:\n- sentence-transformers\n- sentence-similarity\n- feature-extraction\n- text-embeddings-inference\nextra_gated_heading: Access EmbeddingGemma on Hugging Face\nextra_gated_prompt: To access EmbeddingGemma on Hugging Face, you’re required to review and\n agree to Google’s usage license. To do this, please ensure you’re logged in to Hugging\n Face and click below. Requests are processed immediately.\nextra_gated_button_content: Acknowledge license\n---\n\n# EmbeddingGemma model card\n\n**Model Page**: [EmbeddingGemma](https://ai.google.dev/gemma/docs/embeddinggemma)\n\n**Resources and Technical Documentation**:\n\n* [Responsible Generative AI Toolkit](https://ai.google.dev/responsible)\n* [EmbeddingGemma on Kaggle](https://www.kaggle.com/models/google/embeddinggemma/)\n* [EmbeddingGemma on Vertex Model Garden](https://console.cloud.google.com/vertex-ai/publishers/google/model-garden/embeddinggemma)\n\n**Terms of Use**: [Terms](https://ai.google.dev/gemma/terms)\n\n**Authors**: Google DeepMind\n\n## Model Information\n\n### Description\n\nEmbeddingGemma is a 300M parameter, state-of-the-art for its size, open embedding model from Google, built from Gemma 3 (with T5Gemma initialization) and the same research and technology used to create Gemini models. EmbeddingGemma produces vector representations of text, making it well-suited for search and retrieval tasks, including classification, clustering, and semantic similarity search. This model was trained with data in 100+ spoken languages.\n\nThe small size and on-device focus makes it possible to deploy in environments with limited resources such as mobile phones, laptops, or desktops, democratizing access to state of the art AI models and helping foster innovation for everyone.\n\n### Inputs and outputs\n\n- **Input:**\n - Text string, such as a question, a prompt, or a document to be embedded\n - Maximum input context length of 2048 tokens\n\n- **Output:**\n - Numerical vector representations of input text data\n - Output embedding dimension size of 768, with smaller options available (512, 256, or 128) via Matryoshka Representation Learning (MRL). MRL allows users to truncate the output embedding of size 768 to their desired size and then re-normalize for efficient and accurate representation.\n\n### Usage\n\nThese model weights are designed to be used with [Sentence Transformers](https://www.SBERT.net), using the [Gemma 3](https://huggingface.co/docs/transformers/main/en/model_doc/gemma3) implementation from [Hugging Face Transformers](https://huggingface.co/docs/transformers/en/index) as the backbone.\n\nFirst install the Sentence Transformers library:\n\n```bash\npip install -U sentence-transformers\n```\n\nThen you can load this model and run inference.\n\n```python\nfrom sentence_transformers import SentenceTransformer\n\n# Download from the 🤗 Hub\nmodel = SentenceTransformer(""google/embeddinggemma-300m"")\n\n# Run inference with queries and documents\nquery = ""Which planet is known as the Red Planet?""\ndocuments = [\n ""Venus is often called Earth's twin because of its similar size and proximity."",\n ""Mars, known for its reddish appearance, is often referred to as the Red Planet."",\n ""Jupiter, the largest planet in our solar system, has a prominent red spot."",\n ""Saturn, famous for its rings, is sometimes mistaken for the Red Planet.""\n]\nquery_embeddings = model.encode_query(query)\ndocument_embeddings = model.encode_document(documents)\nprint(query_embeddings.shape, document_embeddings.shape)\n# (768,) (4, 768)\n\n# Compute similarities to determine a ranking\nsimilarities = model.similarity(query_embeddings, document_embeddings)\nprint(similarities)\n# tensor([[0.3011, 0.6359, 0.4930, 0.4889]])\n```\n\n**NOTE**: EmbeddingGemma activations do not support `float16`. Please use `float32` or `bfloat16` as appropriate for your hardware.\n\n## Model Data\n\n### Training Dataset\n\nThis model was trained on a dataset of text data that includes a wide variety of sources totaling approximately 320 billion tokens. Here are the key components:\n\n- **Web Documents**: A diverse collection of web text ensures the model is exposed to a broad range of linguistic styles, topics, and vocabulary. The training dataset includes content in over 100 languages.\n- **Code and Technical Documents**: Exposing the model to code and technical documentation helps it learn the structure and patterns of programming languages and specialized scientific content, which improves its understanding of code and technical questions.\n- **Synthetic and Task-Specific Data**: Synthetically training data helps to teach the model specific skills. This includes curated data for tasks like information retrieval, classification, and sentiment analysis, which helps to fine-tune its performance for common embedding applications.\n\nThe combination of these diverse data sources is crucial for training a powerful multilingual embedding model that can handle a wide variety of different tasks and data formats.\n\n### Data Preprocessing\n\nHere are the key data cleaning and filtering methods applied to the training data:\n\n- CSAM Filtering: Rigorous CSAM (Child Sexual Abuse Material) filtering was applied at multiple stages in the data preparation process to ensure the exclusion of harmful and illegal content.\n- Sensitive Data Filtering: As part of making Gemma pre-trained models safe and reliable, automated techniques were used to filter out certain personal information and other sensitive data from training sets.\n- Additional methods: Filtering based on content quality and safety in line with [our policies](https://ai.google/static/documents/ai-responsibility-update-published-february-2025.pdf).\n\n## Model Development\n\n### Hardware\n\nEmbeddingGemma was trained using the latest generation of [Tensor Processing Unit (TPU)](https://cloud.google.com/tpu/docs/intro-to-tpu) hardware (TPUv5e), for more details refer to the [Gemma 3 model card](https://ai.google.dev/gemma/docs/core/model_card_3).\n\n### Software\n\nTraining was done using [JAX](https://github.com/jax-ml/jax) and [ML Pathways](https://blog.google/technology/ai/introducing-pathways-next-generation-ai-architecture/). For more details refer to the [Gemma 3 model card](https://ai.google.dev/gemma/docs/core/model_card_3).\n\n## Evaluation\n\n### Benchmark Results\n\nThe model was evaluated against a large collection of different datasets and metrics to cover different aspects of text understanding.\n\n#### Full Precision Checkpoint\n\n<table>\n <thead>\n <tr>\n <th colspan=""3""><strong>MTEB (Multilingual, v2)</strong></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <td><strong>Dimensionality</strong></td>\n <td><strong>Mean (Task)</strong></td>\n <td><strong>Mean (TaskType)</strong></td>\n </tr>\n <tr>\n <td>768d</td>\n <td>61.15</td>\n <td>54.31</td>\n </tr>\n <tr>\n <td>512d</td>\n <td>60.71</td>\n <td>53.89</td>\n </tr>\n <tr>\n <td>256d</td>\n <td>59.68</td>\n <td>53.01</td>\n </tr>\n <tr>\n <td>128d</td>\n <td>58.23</td>\n <td>51.77</td>\n </tr>\n </tbody>\n</table>\n\n<table>\n <thead>\n <tr>\n <th colspan=""3""><strong>MTEB (English, v2)</strong></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <td><strong>Dimensionality</strong></td>\n <td><strong>Mean (Task)</strong></td>\n <td><strong>Mean (TaskType)</strong></td>\n </tr>\n <tr>\n <td>768d</td>\n <td>68.36</td>\n <td>64.15</td>\n </tr>\n <tr>\n <td>512d</td>\n <td>67.80</td>\n <td>63.59</td>\n </tr>\n <tr>\n <td>256d</td>\n <td>66.89</td>\n <td>62.94</td>\n </tr>\n <tr>\n <td>128d</td>\n <td>65.09</td>\n <td>61.56</td>\n </tr>\n </tbody>\n</table>\n\n<table>\n <thead>\n <tr>\n <th colspan=""3""><strong>MTEB (Code, v1)</strong></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <td><strong>Dimensionality</strong></td>\n <td><strong>Mean (Task)</strong></td>\n <td><strong>Mean (TaskType)</strong></td>\n </tr>\n <tr>\n <td>768d</td>\n <td>68.76</td>\n <td>68.76</td>\n </tr>\n <tr>\n <td>512d</td>\n <td>68.48</td>\n <td>68.48</td>\n </tr>\n <tr>\n <td>256d</td>\n <td>66.74</td>\n <td>66.74</td>\n </tr>\n <tr>\n <td>128d</td>\n <td>62.96</td>\n <td>62.96</td>\n </tr>\n </tbody>\n</table>\n\n#### QAT Checkpoints\n\n<table>\n <thead>\n <tr>\n <th colspan=""3""><strong>MTEB (Multilingual, v2)</strong></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <td><strong>Quant config (dimensionality)</strong></td>\n <td><strong>Mean (Task)</strong></td>\n <td><strong>Mean (TaskType)</strong></td>\n </tr>\n <tr>\n <td>Q4_0 (768d)</td>\n <td>60.62</td>\n <td>53.61</td>\n </tr>\n <tr>\n <td>Q8_0 (768d)</td>\n <td>60.93</td>\n <td>53.95</td>\n </tr>\n <tr>\n <td>Mixed Precision* (768d)</td>\n <td>60.69</td>\n <td>53.82</td>\n </tr>\n </tbody>\n</table>\n\n<table>\n <thead>\n <tr>\n <th colspan=""3""><strong>MTEB (English, v2)</strong></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <td><strong>Quant config (dimensionality)</strong></td>\n <td><strong>Mean (Task)</strong></td>\n <td><strong>Mean (TaskType)</strong></td>\n </tr>\n <tr>\n <td>Q4_0 (768d)</td>\n <td>67.91</td>\n <td>63.64</td>\n </tr>\n <tr>\n <td>Q8_0 (768d)</td>\n <td>68.13</td>\n <td>63.85</td>\n </tr>\n <tr>\n <td>Mixed Precision* (768d)</td>\n <td>67.95</td>\n <td>63.83</td>\n </tr>\n </tbody>\n</table>\n\n<table>\n <thead>\n <tr>\n <th colspan=""3""><strong>MTEB (Code, v1)</strong></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <td><strong>Quant config (dimensionality)</strong></td>\n <td><strong>Mean (Task)</strong></td>\n <td><strong>Mean (TaskType)</strong></td>\n </tr>\n <tr>\n <td>Q4_0 (768d)</td>\n <td>67.99</td>\n <td>67.99</td>\n </tr>\n <tr>\n <td>Q8_0 (768d)</td>\n <td>68.70</td>\n <td>68.70</td>\n </tr>\n <tr>\n <td>Mixed Precision* (768d)</td>\n <td>68.03</td>\n <td>68.03</td>\n </tr>\n </tbody>\n</table>\n\nNote: QAT models are evaluated after quantization\n\n\* Mixed Precision refers to per-channel quantization with int4 for embeddings, feedforward, and projection layers, and int8 for attention (e4_a8_f4_p4).\n\n### Prompt Instructions\n\nEmbeddingGemma can generate optimized embeddings for various use cases—such as document retrieval, question answering, and fact verification—or for specific input types—either a query or a document—using prompts that are prepended to the input strings.\nQuery prompts follow the form `task: {task description} | query: ` where the task description varies by the use case, with the default task description being `search result`. Document-style prompts follow the form `title: {title | ""none""} | text: ` where the title is either `none` (the default) or the actual title of the document. Note that providing a title, if available, will improve model performance for document prompts but may require manual formatting.\n\nUse the following prompts based on your use case and input data type. These may already be available in the EmbeddingGemma configuration in your modeling framework of choice.\n\n<table>\n <thead>\n <tr>\n <th><br>\n<strong>Use Case (task type enum)</strong></th>\n <th><br>\n<strong>Descriptions</strong></th>\n <th><br>\n<strong>Recommended Prompt</strong></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <td><br>\nRetrieval (Query)</td>\n <td rowspan=""4""><br>\nUsed to generate embeddings that are optimized for document search or information retrieval</td>\n <td><br>\ntask: search result | query: {content}</td>\n </tr>\n <tr>\n <td><br>\nRetrieval (Document)</td>\n <td><br>\ntitle: {title | ""none""} | text: {content}</td>\n </tr>\n <tr>\n <td><br>\nQuestion Answering</td>\n <td><br>\ntask: question answering | query: {content}</td>\n </tr>\n <tr>\n <td><br>\nFact Verification</td>\n <td><br>\ntask: fact checking | query: {content}</td>\n </tr>\n <tr>\n <td><br>\nClassification</td>\n <td><br>\nUsed to generate embeddings that are optimized to classify texts according to preset labels</td>\n <td><br>\ntask: classification | query: {content}</td>\n </tr>\n <tr>\n <td><br>\nClustering</td>\n <td><br>\nUsed to generate embeddings that are optimized to cluster texts based on their similarities</td>\n <td><br>\ntask: clustering | query: {content}</td>\n </tr>\n <tr>\n <td><br>\nSemantic Similarity</td>\n <td><br>\nUsed to generate embeddings that are optimized to assess text similarity. This is not intended for retrieval use cases.</td>\n <td><br>\ntask: sentence similarity | query: {content}</td>\n </tr>\n <tr>\n <td><br>\nCode Retrieval</td>\n <td><br>\nUsed to retrieve a code block based on a natural language query, such as <em>sort an array</em> or <em>reverse a linked list</em>. Embeddings of the code blocks are computed using retrieval_document.</td>\n <td><br>\ntask: code retrieval | query: {content}</td>\n </tr>\n </tbody>\n</table>\n\n## Usage and Limitations\n\nThese models have certain limitations that users should be aware of.\n\n### Intended Usage\n\nOpen embedding models have a wide range of applications across various industries and domains. The following list of potential uses is not comprehensive. The purpose of this list is to provide contextual information about the possible use-cases that the model creators considered as part of model training and development.\n\n- **Semantic Similarity**: Embeddings optimized to assess text similarity, such as recommendation systems and duplicate detection\n- **Classification**: Embeddings optimized to classify texts according to preset labels, such as sentiment analysis and spam detection\n- **Clustering**: Embeddings optimized to cluster texts based on their similarities, such as document organization, market research, and anomaly detection\n- **Retrieval**\n - **Document**: Embeddings optimized for document search, such as indexing articles, books, or web pages for search\n - **Query**: Embeddings optimized for general search queries, such as custom search\n - **Code Query**: Embeddings optimized for retrieval of code blocks based on natural language queries, such as code suggestions and search\n\n- **Question Answering**: Embeddings for questions in a question-answering system, optimized for finding documents that answer the question, such as chatbox.\n- **Fact Verification**: Embeddings for statements that need to be verified, optimized for retrieving documents that contain evidence supporting or refuting the statement, such as automated fact-checking systems.\n\n### Limitations\n\n- Training Data\n - The quality and diversity of the training data significantly influence the model's capabilities. Biases or gaps in the training data can lead to limitations in the model's responses.\n - The scope of the training dataset determines the subject areas the model can handle effectively.\n\n- Language Ambiguity and Nuance\n - Natural language is inherently complex. Models might struggle to grasp subtle nuances, sarcasm, or figurative language.\n\n### Ethical Considerations and Risks\n\nRisks identified and mitigations:\n\n- **Perpetuation of biases**: It's encouraged to perform continuous monitoring (using evaluation metrics, human review) and the exploration of de-biasing techniques during model training, fine-tuning, and other use cases.\n- **Misuse for malicious purposes**: Technical limitations and developer and end-user education can help mitigate against malicious applications of embeddings. Educational resources and reporting mechanisms for users to flag misuse are provided. Prohibited uses of Gemma models are outlined in the [Gemma Prohibited Use Policy](https://ai.google.dev/gemma/prohibited_use_policy).\n- **Privacy violations**: Models were trained on data filtered for removal of certain personal information and other sensitive data. Developers are encouraged to adhere to privacy regulations with privacy-preserving techniques.\n\n### Benefits\n\nAt the time of release, this family of models provides high-performance open embedding model implementations designed from the ground up for responsible AI development compared to similarly sized models. Using the benchmark evaluation metrics described in this document, these models have shown superior performance to other, comparably-sized open model alternatives.",2025-07-17T19:53:55+00:00,model,"[24, 7]","model_finetune_model:sigridjineth/colbert-ko-embeddinggemma-300m, sentence-transformers/embeddinggemma-300m-medical, yasserrmd/geo-gemma-300m-emb, unsloth/embeddinggemma-300m, Omartificial-Intelligence-Space/AraGemma-Embedding-300m, Saidakmal/uz_embeddinggemma-300m, yasserrmd/pharma-gemma-300m-emb, model_quantized_model:onnx-community/embeddinggemma-300m-ONNX, unsloth/embeddinggemma-300m-GGUF, ggml-org/embeddinggemma-300M-GGUF, SandLogicTechnologies/EmbeddingGemma-300m-GGUF","[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"


In [9]:
import pandas as pd
import json
import pickle

# --- Load Edges ---
with open("edges_df.pkl", "rb") as f:
    edges_df = pickle.load(f)

# --- Load Mapping Dictionaries ---
with open("old_to_new_idx.json", "r") as f:
    old_to_new_idx = json.load(f)

with open("task_to_idx.json", "r") as f:
    task_to_idx = json.load(f)

print("✅ Files loaded successfully!")
print("edges_df shape:", edges_df.shape)
print("old_to_new_idx size:", len(old_to_new_idx))
print("task_to_idx:", task_to_idx)


✅ Files loaded successfully!
edges_df shape: (303296, 4)
old_to_new_idx size: 107694
task_to_idx: {'text-generation': 0, 'question-answering': 1, 'text-to-video': 2, 'image-to-video': 3, 'image-to-3d': 4, 'robotics': 5, 'translation': 6, 'feature-extraction': 7, 'text-to-3d': 8, 'text-to-speech': 9, 'automatic-speech-recognition': 10, 'image-classification': 11, 'table-question-answering': 12, 'fill-mask': 13, 'multiple-choice': 14, 'visual-question-answering': 15, 'summarization': 16, 'image-to-text': 17, 'image-feature-extraction': 18, 'text-to-image': 19, 'text-to-audio': 20, 'reinforcement-learning': 21, 'image-text-to-text': 22, 'text-classification': 23, 'sentence-similarity': 24, 'zero-shot-classification': 25, 'text-retrieval': 26, 'token-classification': 27, 'object-detection': 28, 'audio-classification': 29, 'image-segmentation': 30, 'time-series-forecasting': 31, 'video-classification': 32, 'zero-shot-image-classification': 33, 'any-to-any': 34, 'image-to-image': 35, 'depth-

In [11]:
edges_df.head(10)

Unnamed: 0_level_0,source_node,dest_node,edge_type,edge_attr
match,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,tencent/SRPO,rockerBOO/flux.1-dev-SRPO,model_finetune_model,0
1,tencent/SRPO,wikeeyang/SRPO-Refine-Quantized-v1.0,model_quantized_model,3
1,tencent/SRPO,befox/SRPO-GGUF,model_quantized_model,3
1,tencent/SRPO,wikeeyang/SRPO-for-ComfyUI,model_quantized_model,3
0,baidu/ERNIE-4.5-21B-A3B-Thinking,unsloth/ERNIE-4.5-21B-A3B-Thinking,model_finetune_model,0
1,baidu/ERNIE-4.5-21B-A3B-Thinking,unsloth/ERNIE-4.5-21B-A3B-Thinking-GGUF,model_quantized_model,3
1,baidu/ERNIE-4.5-21B-A3B-Thinking,gabriellarson/ERNIE-4.5-21B-A3B-Thinking-GGUF,model_quantized_model,3
1,baidu/ERNIE-4.5-21B-A3B-Thinking,cpatonn/ERNIE-4.5-21B-A3B-Thinking-AWQ-8bit,model_quantized_model,3
1,baidu/ERNIE-4.5-21B-A3B-Thinking,cpatonn/ERNIE-4.5-21B-A3B-Thinking-AWQ-4bit,model_quantized_model,3
1,baidu/ERNIE-4.5-21B-A3B-Thinking,mradermacher/ERNIE-4.5-21B-A3B-Thinking-GGUF,model_quantized_model,3


In [12]:
edges_df.columns

Index(['source_node', 'dest_node', 'edge_type', 'edge_attr'], dtype='object')

In [10]:
# remove spaces in edge_type
edges_df["edge_type"] = (
    edges_df["edge_type"]
    .astype(str)
    .str.strip()                   # remove leading/trailing spaces
    .str.replace(r"\s+", "_", regex=True)  # collapse internal spaces to underscores if any
)

print("✅ Cleaned edge_type values:")
print(edges_df["edge_type"].unique())


✅ Cleaned edge_type values:
['model_finetune_model' 'model_quantized_model' 'model_adapter_model'
 'model_trainedOrFineTunedOn_dataset' 'model_merge_model']


In [11]:
# dedup edges

SRC_COL = "source_node"            # or "source_node"
DST_COL = "dest_node"            # or "dest_node"
EDGE_TYPE_COL = "edge_type"
EDGE_ATTR_COL = "edge_attr"

# Canonicalize undirected pair so (u,v) == (v,u)
edges_df["canon_u"] = edges_df[[SRC_COL, DST_COL]].min(axis=1)
edges_df["canon_v"] = edges_df[[SRC_COL, DST_COL]].max(axis=1)

# Drop duplicates in-place using the canonical keys + attributes
edges_df.drop_duplicates(
    subset=["canon_u", "canon_v", EDGE_TYPE_COL],
    keep="first",
    inplace=True
)

# Drop helper columns now that dedup is done
edges_df.drop(columns=["canon_u", "canon_v"], inplace=True)

# Reset index to keep things tidy
edges_df.reset_index(drop=True, inplace=True)

print(f"✅ Deduplicated edges_df now has {len(edges_df)} rows (in place).")


✅ Deduplicated edges_df now has 299702 rows (in place).


In [12]:
# Split edge_type into source_type and dest_type
edges_df["source_type"] = edges_df["edge_type"].str.split("_").str[0]
edges_df["dest_type"] = edges_df["edge_type"].str.split("_").str[-1]

print("✅ Added columns 'source_type' and 'dest_type'")
display(edges_df.head(10))


✅ Added columns 'source_type' and 'dest_type'


Unnamed: 0,source_node,dest_node,edge_type,edge_attr,source_type,dest_type
0,tencent/SRPO,rockerBOO/flux.1-dev-SRPO,model_finetune_model,0,model,model
1,tencent/SRPO,wikeeyang/SRPO-Refine-Quantized-v1.0,model_quantized_model,3,model,model
2,tencent/SRPO,befox/SRPO-GGUF,model_quantized_model,3,model,model
3,tencent/SRPO,wikeeyang/SRPO-for-ComfyUI,model_quantized_model,3,model,model
4,baidu/ERNIE-4.5-21B-A3B-Thinking,unsloth/ERNIE-4.5-21B-A3B-Thinking,model_finetune_model,0,model,model
5,baidu/ERNIE-4.5-21B-A3B-Thinking,unsloth/ERNIE-4.5-21B-A3B-Thinking-GGUF,model_quantized_model,3,model,model
6,baidu/ERNIE-4.5-21B-A3B-Thinking,gabriellarson/ERNIE-4.5-21B-A3B-Thinking-GGUF,model_quantized_model,3,model,model
7,baidu/ERNIE-4.5-21B-A3B-Thinking,cpatonn/ERNIE-4.5-21B-A3B-Thinking-AWQ-8bit,model_quantized_model,3,model,model
8,baidu/ERNIE-4.5-21B-A3B-Thinking,cpatonn/ERNIE-4.5-21B-A3B-Thinking-AWQ-4bit,model_quantized_model,3,model,model
9,baidu/ERNIE-4.5-21B-A3B-Thinking,mradermacher/ERNIE-4.5-21B-A3B-Thinking-GGUF,model_quantized_model,3,model,model


In [13]:
list(old_to_new_idx.items())[:30]

[('0', 0),
 ('1', 1),
 ('2', 2),
 ('3', 3),
 ('4', 4),
 ('5', 5),
 ('6', 6),
 ('7', 7),
 ('8', 8),
 ('10', 9),
 ('12', 10),
 ('13', 11),
 ('14', 12),
 ('15', 13),
 ('16', 14),
 ('18', 15),
 ('19', 16),
 ('20', 17),
 ('22', 18),
 ('23', 19),
 ('24', 20),
 ('25', 21),
 ('26', 22),
 ('27', 23),
 ('28', 24),
 ('30', 25),
 ('31', 26),
 ('33', 27),
 ('34', 28),
 ('35', 29)]

In [17]:
nodes_df.columns

Index(['id', 'description', 'createdAt', 'type', 'y_multi_lab',
       'relationships', 'y'],
      dtype='object')

In [13]:
# import os
# import torch
# from torch_geometric.llm.models import SentenceTransformer
# from huggingface_hub import login

# # ---- setup ----
# os.environ["HF_HOME"] = "/home/hice1/cxu371/scratch/hf_cache"
# os.environ["TRANSFORMERS_CACHE"] = "/home/hice1/cxu371/scratch/hf_cache"

# # authenticate to HF
# login(token="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")  # or directly pass the string token

# # ---- load model ----
# encoder = SentenceTransformer("BAAI/bge-base-en-v1.5").to("cuda")
# print("Loaded encoder:", encoder)

# loading from my snapshot
import os
from torch_geometric.llm.models import SentenceTransformer

# Optional: make sure caches point to scratch
os.environ["HF_HOME"] = "/home/hice1/cxu371/scratch/huggingface_cache"
os.environ["TRANSFORMERS_CACHE"] = "/home/hice1/cxu371/scratch/huggingface_cache"

# Directly point to the snapshot directory
local_model_path = "/home/hice1/cxu371/scratch/huggingface_cache/hub/models--BAAI--bge-base-en-v1.5/snapshots/a5beb1e3e68b9ab74eb54cfd186867f64f240e1a"

encoder = SentenceTransformer(local_model_path).to("cuda")
print("✅ Loaded encoder:", encoder)


✅ Loaded encoder: SentenceTransformer(model_name=/home/hice1/cxu371/scratch/huggingface_cache/hub/models--BAAI--bge-base-en-v1.5/snapshots/a5beb1e3e68b9ab74eb54cfd186867f64f240e1a)


In [14]:
encoder.eval()

SentenceTransformer(model_name=/home/hice1/cxu371/scratch/huggingface_cache/hub/models--BAAI--bge-base-en-v1.5/snapshots/a5beb1e3e68b9ab74eb54cfd186867f64f240e1a)

In [17]:
edges_df.head(2)

Unnamed: 0,source_node,dest_node,edge_type,edge_attr,source_type,dest_type
0,tencent/SRPO,rockerBOO/flux.1-dev-SRPO,model_finetune_model,0,model,model
1,tencent/SRPO,wikeeyang/SRPO-Refine-Quantized-v1.0,model_quantized_model,3,model,model


In [78]:
nodes_df.columns

Index(['id', 'description', 'createdAt', 'type', 'y_multi_lab',
       'relationships', 'y'],
      dtype='object')

In [71]:
nodes_df.head(2)

Unnamed: 0,id,description,createdAt,type,y_multi_lab,relationships,y
0,tencent/SRPO,"---\nlibrary_name: diffusers\nlicense: other\nlicense_name: tencent-hunyuan-community\nlicense_link: https://github.com/Tencent-Hunyuan/SRPO/blob/main/LICENSE.txt\npipeline_tag: text-to-image\n---\n\n<div align=“center” style=“font-family: charter;”>\n<h1 align=""center"">Directly Aligning the Full Diffusion Trajectory with Fine-Grained Human Preference </h1>\n<div align=""center"">\n <a href='https://arxiv.org/abs/2509.06942'><img src='https://img.shields.io/badge/ArXiv-red?logo=arxiv'></a> &nbsp;\n <a href='https://github.com/Tencent-Hunyuan/SRPO'><img src='https://img.shields.io/badge/_Code-SRPO-181717?color=121717&logo=github&logoColor=whitee'></a> &nbsp; \n <a href='https://tencent.github.io/srpo-project-page/'><img src='https://img.shields.io/badge/%F0%9F%92%BB_Project-SRPO-blue'></a> &nbsp;\n</div>\n<div align=""center"">\n Xiangwei Shen<sup>1,2*</sup>,\n <a href=""https://scholar.google.com/citations?user=Lnr1FQEAAAAJ&hl=zh-CN"" target=""_blank""><b>Zhimin Li</b></a><sup>1*</sup>,\n <a href=""https://scholar.google.com.hk/citations?user=Fz3X5FwAAAAJ"" target=""_blank""><b>Zhantao Yang</b></a><sup>1</sup>, \n <a href=""https://shiyi-zh0408.github.io/"" target=""_blank""><b>Shiyi Zhang</b></a><sup>3</sup>,\n Yingfang Zhang<sup>1</sup>,\n Donghao Li<sup>1</sup>,\n <br>\n <a href=""https://scholar.google.com/citations?user=VXQV5xwAAAAJ&hl=en"" target=""_blank""><b>Chunyu Wang</b></a><sup>1</sup>,\n <a href=""https://openreview.net/profile?id=%7EQinglin_Lu2"" target=""_blank""><b>Qinglin Lu</b></a><sup>1</sup>,\n <a href=""https://andytang15.github.io"" target=""_blank""><b>Yansong Tang</b></a><sup>3,✝</sup>\n</div>\n<div align=""center"">\n <sup>1</sup>Hunyuan, Tencent \n <br>\n <sup>2</sup>School of Science and Engineering, The Chinese University of Hong Kong, Shenzhen \n <br>\n <sup>3</sup>Shenzhen International Graduate School, Tsinghua University \n <br>\n <sup>*</sup>Equal contribution \n <sup>✝</sup>Corresponding author\n</div>\n\n\n\n## Abstract\nRecent studies have demonstrated the effectiveness of directly aligning diffusion models with human preferences using differentiable reward. However, they exhibit two primary challenges: (1) they rely on multistep denoising with gradient computation for reward scoring, which is computationally expensive, thus restricting optimization to only a few diffusion steps; (2) they often need continuous offline adaptation of reward models in order to achieve desired aesthetic quality, such as photorealism or precise lighting effects. To address the limitation of multistep denoising, we propose Direct-Align, a method that predefines a noise prior to effectively recover original images from any time steps via interpolation, leveraging the equation that diffusion states are interpolations between noise and target images, which effectively avoids over-optimization in late timesteps. Furthermore, we introduce Semantic Relative Preference Optimization (SRPO), in which rewards are formulated as text-conditioned signals. This approach enables online adjustment of rewards in response to positive and negative prompt augmentation, thereby reducing the reliance on offline reward fine-tuning. By fine-tuning the FLUX.1.dev model with optimized denoising and online reward adjustment, we improve its human-evaluated realism and aesthetic quality by over 3x.\n\n## Acknowledgement\n\nWe sincerely appreciate contributions from the research community to this project. Below are quantized versions developed by fellow researchers.\n\n1. 8bit(fp8_e4m3fn/Q8_0) version by wikeeyang: https://huggingface.co/wikeeyang/SRPO-Refine-Quantized-v1.0\n![image/png](https://cdn-uploads.huggingface.co/production/uploads/6645835a2b57c619a19cc0c4/BATJ0bW_0QPhkN5WY0Q1H.png)\n\n2. bf16 version by rockerBOO: https://huggingface.co/rockerBOO/flux.1-dev-SRPO\n3. GGUF version by befox: https://huggingface.co/befox/SRPO-GGUF\n\n⚠️ Note: When loading weights in ComfyUI, avoid direct conversion of FP32 weights to FP8 format, as this may result in incomplete denoising. For official weights in this repository, FP32/BF16 loading is recommended.\n\n\n### Checkpoints\nThe `diffusion_pytorch_model.safetensors` is online version of SRPO based on [FLUX.1 Dev](https://huggingface.co/black-forest-labs/FLUX.1-dev), trained on HPD dataset with [HPSv2](https://github.com/tgxs002/HPSv2)\n## 🔑 Inference\n\n### Using ComfyUI\n\nYou can use it in [ComfyUI](https://github.com/comfyanonymous/ComfyUI).\n\nLoad the following image in ComfyUI to get the workflow, or load the JSON file directly [SRPO-workflow](comfyui/SRPO-workflow.json):\n\nTip: The workflow JSON info was added to the image file.\n\n![Example](comfyui/SRPO-workflow.png)\n\n### Quick start\n```bash\nfrom diffusers import FluxPipeline\nfrom safetensors.torch import load_file\n\nprompt='The Death of Ophelia by John Everett Millais, Pre-Raphaelite painting, Ophelia floating in a river surrounded by flowers, detailed natural elements, melancholic and tragic atmosphere'\npipe = FluxPipeline.from_pretrained('./data/flux',\n torch_dtype=torch.bfloat16,\n use_safetensors=True\n ).to(""cuda"")\nstate_dict = load_file(""./srpo/diffusion_pytorch_model.safetensors"")\npipe.transformer.load_state_dict(state_dict)\nimage = pipe(\n prompt,\n guidance_scale=3.5,\n height=1024,\n width=1024,\n num_inference_steps=50,\n max_sequence_length=512,\n generator=generator\n).images[0]\n```\n### License\nSRPO is licensed under the License Terms of SRPO. See `./License.txt` for more details.\n## Citation\nIf you use SRPO for your research, please cite our paper:\n\n```bibtex\n@misc{shen2025directlyaligningdiffusiontrajectory,\n title={Directly Aligning the Full Diffusion Trajectory with Fine-Grained Human Preference}, \n author={Xiangwei Shen and Zhimin Li and Zhantao Yang and Shiyi Zhang and Yingfang Zhang and Donghao Li and Chunyu Wang and Qinglin Lu and Yansong Tang},\n year={2025},\n eprint={2509.06942},\n archivePrefix={arXiv},\n primaryClass={cs.AI},\n url={https://arxiv.org/abs/2509.06942}, \n}\n```",2025-09-08T12:44:15+00:00,model,[19],"model_finetune_model:rockerBOO/flux.1-dev-SRPO, model_quantized_model:wikeeyang/SRPO-Refine-Quantized-v1.0, befox/SRPO-GGUF, wikeeyang/SRPO-for-ComfyUI","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
1,baidu/ERNIE-4.5-21B-A3B-Thinking,"---\nlicense: apache-2.0\nlanguage:\n- en\n- zh\npipeline_tag: text-generation\ntags:\n- ERNIE4.5\nlibrary_name: transformers\n---\n\n<div align=""center"" style=""line-height: 1;"">\n <a href=""https://ernie.baidu.com/"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Chat"" src=""https://img.shields.io/badge/🤖_Chat-ERNIE_Bot-blue"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://huggingface.co/baidu"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Hugging Face"" src=""https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Baidu-ffc107?color=ffc107&logoColor=white"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://github.com/PaddlePaddle/ERNIE"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Github"" src=""https://img.shields.io/badge/GitHub-ERNIE-000?logo=github&color=0000FF"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://ernie.baidu.com/blog/ernie4.5"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Blog"" src=""https://img.shields.io/badge/🖖_Blog-ERNIE4.5-A020A0"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://discord.gg/JPmZXDsEEK"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Discord"" src=""https://img.shields.io/badge/Discord-ERNIE-5865F2?logo=discord&logoColor=white"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://x.com/PaddlePaddle"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""X"" src=""https://img.shields.io/badge/X-PaddlePaddle-6080F0""?logo=x&logoColor=white"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n</div>\n\n<div align=""center"" style=""line-height: 1;"">\n <a href=""#license"" style=""margin: 2px;"">\n <img alt=""License"" src=""https://img.shields.io/badge/License-Apache2.0-A5de54"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n</div>\n\n# ERNIE-4.5-21B-A3B-Thinking\n\n## Model Highlights\n\nOver the past three months, we have continued to scale the **thinking capability** of ERNIE-4.5-21B-A3B, improving both the **quality and depth** of reasoning, thereby advancing the competitiveness of ERNIE **lightweight models** in complex reasoning tasks. We are pleased to introduce **ERNIE-4.5-21B-A3B-Thinking**, featuring the following key enhancements:\n\n* **Significantly improved performance** on reasoning tasks, including logical reasoning, mathematics, science, coding, text generation, and academic benchmarks that typically require human expertise.\n* **Efficient tool usage** capabilities.\n* **Enhanced 128K long-context understanding** capabilities.\n\n> [!NOTE]\n> Note: This version has an increased thinking length. We strongly recommend its use in highly complex reasoning tasks.\n\n![benchmark](./benchmark.png)\n\n## Model Overview\n\nERNIE-4.5-21B-A3B-Thinking is a text MoE post-trained model, with 21B total parameters and 3B activated parameters for each token. The following are the model configuration details:\n\n|Key|Value|\n|-|-|\n|Modality|Text|\n|Training Stage|Posttraining|\n|Params(Total / Activated)|21B / 3B|\n|Layers|28|\n|Heads(Q/KV)|20 / 4|\n|Text Experts(Total / Activated)|64 / 6|\n|Shared Experts|2|\n|Context Length|131072|\n\n## Quickstart\n\n> [!NOTE]\n> To align with the wider community, this model releases Transformer-style weights. Both PyTorch and PaddlePaddle ecosystem tools, such as vLLM, transformers, and FastDeploy, are expected to be able to load and run this model.\n\n### FastDeploy Inference\n\nQuickly deploy services using FastDeploy as shown below. For more detailed usage, refer to the [FastDeploy GitHub Repository](https://github.com/PaddlePaddle/FastDeploy).\n\n**Note**: 80GB x 1 GPU resources are required. Deploying this model requires FastDeploy version 2.2.\n\n```bash\npython -m fastdeploy.entrypoints.openai.api_server \\n --model baidu/ERNIE-4.5-21B-A3B-Thinking \\n --port 8180 \\n --metrics-port 8181 \\n --engine-worker-queue-port 8182 \\n --load_choices ""default_v1"" \\n --tensor-parallel-size 1 \\n --max-model-len 131072 \\n --reasoning-parser ernie_x1 \\n --tool-call-parser ernie_x1 \\n --max-num-seqs 32\n```\n\nThe ERNIE-4.5-21B-A3B-Thinking model supports function call.\n\n```bash\ncurl -X POST ""http://0.0.0.0:8180/v1/chat/completions"" \\n-H ""Content-Type: application/json"" \\n-d $'{\n ""messages"": [\n {\n ""role"": ""user"",\n ""content"": ""How \'s the weather in Beijing today?""\n }\n ],\n ""tools"": [\n {\n ""type"": ""function"",\n ""function"": {\n ""name"": ""get_weather"",\n ""description"": ""Determine weather in my location"",\n ""parameters"": {\n ""type"": ""object"",\n ""properties"": {\n ""location"": {\n ""type"": ""string"",\n ""description"": ""The city and state e.g. San Francisco, CA""\n },\n ""unit"": {\n ""type"": ""string"",\n ""enum"": [\n ""c"",\n ""f""\n ]\n }\n },\n ""additionalProperties"": false,\n ""required"": [\n ""location"",\n ""unit""\n ]\n },\n ""strict"": true\n }\n }]\n}'\n```\n\n### vLLM inference\n\n```bash\nvllm serve baidu/ERNIE-4.5-21B-A3B-Thinking\n```\n\nThe `reasoning-parser` and `tool-call-parser` for vLLM Ernie are currently under development.\n\n### Using `transformers` library\n\n**Note**: You'll need the`transformers`library (version 4.54.0 or newer) installed to use this model.\n\nThe following contains a code snippet illustrating how to use the model generate content based on given inputs.\n\n```python\nimport torch\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\nmodel_name = ""baidu/ERNIE-4.5-21B-A3B-Thinking""\n\n# load the tokenizer and the model\ntokenizer = AutoTokenizer.from_pretrained(model_name)\nmodel = AutoModelForCausalLM.from_pretrained(\n model_name,\n device_map=""auto"",\n torch_dtype=torch.bfloat16,\n)\n\n# prepare the model input\nprompt = ""Give me a short introduction to large language model.""\nmessages = [\n {""role"": ""user"", ""content"": prompt}\n]\ntext = tokenizer.apply_chat_template(\n messages,\n tokenize=False,\n add_generation_prompt=True\n)\nmodel_inputs = tokenizer([text], add_special_tokens=False, return_tensors=""pt"").to(model.device)\n\n# conduct text completion\ngenerated_ids = model.generate(\n **model_inputs,\n max_new_tokens=1024\n)\noutput_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()\n\n# decode the generated ids\ngenerate_text = tokenizer.decode(output_ids, skip_special_tokens=True)\nprint(""generate_text:"", generate_text)\n```\n\n## License\n\nThe ERNIE 4.5 models are provided under the Apache License 2.0. This license permits commercial use, subject to its terms and conditions. Copyright (c) 2025 Baidu, Inc. All Rights Reserved.\n\n## Citation\n\nIf you find ERNIE 4.5 useful or wish to use it in your projects, please kindly cite our technical report:\n\n```text\n@misc{ernie2025technicalreport,\n title={ERNIE 4.5 Technical Report},\n author={Baidu-ERNIE-Team},\n year={2025},\n primaryClass={cs.CL},\n howpublished={\url{https://ernie.baidu.com/blog/publication/ERNIE_Technical_Report.pdf}}\n}\n```\n\n",2025-09-08T14:18:31+00:00,model,[0],"model_finetune_model:unsloth/ERNIE-4.5-21B-A3B-Thinking, model_quantized_model:unsloth/ERNIE-4.5-21B-A3B-Thinking-GGUF, gabriellarson/ERNIE-4.5-21B-A3B-Thinking-GGUF, cpatonn/ERNIE-4.5-21B-A3B-Thinking-AWQ-8bit, cpatonn/ERNIE-4.5-21B-A3B-Thinking-AWQ-4bit, mradermacher/ERNIE-4.5-21B-A3B-Thinking-GGUF, nightmedia/ERNIE-4.5-21B-A3B-Thinking-mxfp4-mlx, wekW/ERNIE-4.5-21B-A3B-Thinking-Q8_0-GGUF","[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"


In [106]:
edges_df.shape

(299702, 6)

In [15]:
import pandas as pd

# --- Step 1: create indexed copies of nodes_df for joining ---
nodes_src = nodes_df.set_index(["id", "type"]).add_prefix("source_")
nodes_dst = nodes_df.set_index(["id", "type"]).add_prefix("dest_")

# --- Step 2: join on (source_node, source_type) ---
a = edges_df.join(
    nodes_src,
    on=["source_node", "source_type"],
    how="left"
)

# --- Step 3: join on (dest_node, dest_type) ---
edges_full_left_join = a.join(
    nodes_dst,
    on=["dest_node", "dest_type"],
    how="left",
    lsuffix="_src", rsuffix="_dst"
)

# --- Step 4: select & rename columns cleanly ---
edges_full_left_join = edges_full_left_join[
    [
        "source_node", "dest_node", "edge_type", "edge_attr", "source_type", "dest_type",
        "source_description", "dest_description",
        "source_createdAt", "source_y_multi_lab", "source_relationships", "source_y",
        "dest_createdAt", "dest_y_multi_lab", "dest_relationships", "dest_y",
    ]
]

print(f"✅ Combined dataframe shape: {edges_full_left_join.shape}")
display(edges_full_left_join.head(10))



✅ Combined dataframe shape: (299702, 16)


Unnamed: 0,source_node,dest_node,edge_type,edge_attr,source_type,dest_type,source_description,dest_description,source_createdAt,source_y_multi_lab,source_relationships,source_y,dest_createdAt,dest_y_multi_lab,dest_relationships,dest_y
0,tencent/SRPO,rockerBOO/flux.1-dev-SRPO,model_finetune_model,0,model,model,"---\nlibrary_name: diffusers\nlicense: other\nlicense_name: tencent-hunyuan-community\nlicense_link: https://github.com/Tencent-Hunyuan/SRPO/blob/main/LICENSE.txt\npipeline_tag: text-to-image\n---\n\n<div align=“center” style=“font-family: charter;”>\n<h1 align=""center"">Directly Aligning the Full Diffusion Trajectory with Fine-Grained Human Preference </h1>\n<div align=""center"">\n <a href='https://arxiv.org/abs/2509.06942'><img src='https://img.shields.io/badge/ArXiv-red?logo=arxiv'></a> &nbsp;\n <a href='https://github.com/Tencent-Hunyuan/SRPO'><img src='https://img.shields.io/badge/_Code-SRPO-181717?color=121717&logo=github&logoColor=whitee'></a> &nbsp; \n <a href='https://tencent.github.io/srpo-project-page/'><img src='https://img.shields.io/badge/%F0%9F%92%BB_Project-SRPO-blue'></a> &nbsp;\n</div>\n<div align=""center"">\n Xiangwei Shen<sup>1,2*</sup>,\n <a href=""https://scholar.google.com/citations?user=Lnr1FQEAAAAJ&hl=zh-CN"" target=""_blank""><b>Zhimin Li</b></a><sup>1*</sup>,\n <a href=""https://scholar.google.com.hk/citations?user=Fz3X5FwAAAAJ"" target=""_blank""><b>Zhantao Yang</b></a><sup>1</sup>, \n <a href=""https://shiyi-zh0408.github.io/"" target=""_blank""><b>Shiyi Zhang</b></a><sup>3</sup>,\n Yingfang Zhang<sup>1</sup>,\n Donghao Li<sup>1</sup>,\n <br>\n <a href=""https://scholar.google.com/citations?user=VXQV5xwAAAAJ&hl=en"" target=""_blank""><b>Chunyu Wang</b></a><sup>1</sup>,\n <a href=""https://openreview.net/profile?id=%7EQinglin_Lu2"" target=""_blank""><b>Qinglin Lu</b></a><sup>1</sup>,\n <a href=""https://andytang15.github.io"" target=""_blank""><b>Yansong Tang</b></a><sup>3,✝</sup>\n</div>\n<div align=""center"">\n <sup>1</sup>Hunyuan, Tencent \n <br>\n <sup>2</sup>School of Science and Engineering, The Chinese University of Hong Kong, Shenzhen \n <br>\n <sup>3</sup>Shenzhen International Graduate School, Tsinghua University \n <br>\n <sup>*</sup>Equal contribution \n <sup>✝</sup>Corresponding author\n</div>\n\n\n\n## Abstract\nRecent studies have demonstrated the effectiveness of directly aligning diffusion models with human preferences using differentiable reward. However, they exhibit two primary challenges: (1) they rely on multistep denoising with gradient computation for reward scoring, which is computationally expensive, thus restricting optimization to only a few diffusion steps; (2) they often need continuous offline adaptation of reward models in order to achieve desired aesthetic quality, such as photorealism or precise lighting effects. To address the limitation of multistep denoising, we propose Direct-Align, a method that predefines a noise prior to effectively recover original images from any time steps via interpolation, leveraging the equation that diffusion states are interpolations between noise and target images, which effectively avoids over-optimization in late timesteps. Furthermore, we introduce Semantic Relative Preference Optimization (SRPO), in which rewards are formulated as text-conditioned signals. This approach enables online adjustment of rewards in response to positive and negative prompt augmentation, thereby reducing the reliance on offline reward fine-tuning. By fine-tuning the FLUX.1.dev model with optimized denoising and online reward adjustment, we improve its human-evaluated realism and aesthetic quality by over 3x.\n\n## Acknowledgement\n\nWe sincerely appreciate contributions from the research community to this project. Below are quantized versions developed by fellow researchers.\n\n1. 8bit(fp8_e4m3fn/Q8_0) version by wikeeyang: https://huggingface.co/wikeeyang/SRPO-Refine-Quantized-v1.0\n![image/png](https://cdn-uploads.huggingface.co/production/uploads/6645835a2b57c619a19cc0c4/BATJ0bW_0QPhkN5WY0Q1H.png)\n\n2. bf16 version by rockerBOO: https://huggingface.co/rockerBOO/flux.1-dev-SRPO\n3. GGUF version by befox: https://huggingface.co/befox/SRPO-GGUF\n\n⚠️ Note: When loading weights in ComfyUI, avoid direct conversion of FP32 weights to FP8 format, as this may result in incomplete denoising. For official weights in this repository, FP32/BF16 loading is recommended.\n\n\n### Checkpoints\nThe `diffusion_pytorch_model.safetensors` is online version of SRPO based on [FLUX.1 Dev](https://huggingface.co/black-forest-labs/FLUX.1-dev), trained on HPD dataset with [HPSv2](https://github.com/tgxs002/HPSv2)\n## 🔑 Inference\n\n### Using ComfyUI\n\nYou can use it in [ComfyUI](https://github.com/comfyanonymous/ComfyUI).\n\nLoad the following image in ComfyUI to get the workflow, or load the JSON file directly [SRPO-workflow](comfyui/SRPO-workflow.json):\n\nTip: The workflow JSON info was added to the image file.\n\n![Example](comfyui/SRPO-workflow.png)\n\n### Quick start\n```bash\nfrom diffusers import FluxPipeline\nfrom safetensors.torch import load_file\n\nprompt='The Death of Ophelia by John Everett Millais, Pre-Raphaelite painting, Ophelia floating in a river surrounded by flowers, detailed natural elements, melancholic and tragic atmosphere'\npipe = FluxPipeline.from_pretrained('./data/flux',\n torch_dtype=torch.bfloat16,\n use_safetensors=True\n ).to(""cuda"")\nstate_dict = load_file(""./srpo/diffusion_pytorch_model.safetensors"")\npipe.transformer.load_state_dict(state_dict)\nimage = pipe(\n prompt,\n guidance_scale=3.5,\n height=1024,\n width=1024,\n num_inference_steps=50,\n max_sequence_length=512,\n generator=generator\n).images[0]\n```\n### License\nSRPO is licensed under the License Terms of SRPO. See `./License.txt` for more details.\n## Citation\nIf you use SRPO for your research, please cite our paper:\n\n```bibtex\n@misc{shen2025directlyaligningdiffusiontrajectory,\n title={Directly Aligning the Full Diffusion Trajectory with Fine-Grained Human Preference}, \n author={Xiangwei Shen and Zhimin Li and Zhantao Yang and Shiyi Zhang and Yingfang Zhang and Donghao Li and Chunyu Wang and Qinglin Lu and Yansong Tang},\n year={2025},\n eprint={2509.06942},\n archivePrefix={arXiv},\n primaryClass={cs.AI},\n url={https://arxiv.org/abs/2509.06942}, \n}\n```","---\nbase_model:\n- tencent/SRPO\nlibrary_name: diffusers\nlicense: other\nlicense_name: tencent-hunyuan-community\nlicense_link: https://github.com/Tencent-Hunyuan/SRPO/blob/main/LICENSE.txt\npipeline_tag: text-to-image\n---\n\n## bf16 and (remaking FP8 version) versions of SRPO from Tencent\n\n<div align=""center"" style=""font-family: charter;"">\n<h1 align=""center"">Directly Aligning the Full Diffusion Trajectory with Fine-Grained Human Preference </h1>\n<div align=""center"">\n <a href='https://arxiv.org/abs/2509.06942'><img src='https://img.shields.io/badge/ArXiv-red?logo=arxiv'></a> &nbsp;\n <a href='https://github.com/Tencent-Hunyuan/SRPO'><img src='https://img.shields.io/badge/_Code-SRPO-181717?color=121717&logo=github&logoColor=whitee'></a> &nbsp; \n <a href='https://tencent.github.io/srpo-project-page/'><img src='https://img.shields.io/badge/%F0%9F%92%BB_Project-SRPO-blue'></a> &nbsp;\n</div>\n<div align=""center"">\n Xiangwei Shen<sup>1,2*</sup>,\n <a href=""https://scholar.google.com/citations?user=Lnr1FQEAAAAJ&hl=zh-CN"" target=""_blank""><b>Zhimin Li</b></a><sup>1*</sup>,\n <a href=""https://scholar.google.com.hk/citations?user=Fz3X5FwAAAAJ"" target=""_blank""><b>Zhantao Yang</b></a><sup>1</sup>, \n <a href=""https://shiyi-zh0408.github.io/"" target=""_blank""><b>Shiyi Zhang</b></a><sup>3</sup>,\n Yingfang Zhang<sup>1</sup>,\n Donghao Li<sup>1</sup>,\n <br>\n <a href=""https://scholar.google.com/citations?user=VXQV5xwAAAAJ&hl=en"" target=""_blank""><b>Chunyu Wang</b></a><sup>1</sup>,\n <a href=""https://openreview.net/profile?id=%7EQinglin_Lu2"" target=""_blank""><b>Qinglin Lu</b></a><sup>1</sup>,\n <a href=""https://andytang15.github.io"" target=""_blank""><b>Yansong Tang</b></a><sup>3,✝</sup>\n</div>\n<div align=""center"">\n <sup>1</sup>Hunyuan, Tencent \n <br>\n <sup>2</sup>School of Science and Engineering, The Chinese University of Hong Kong, Shenzhen \n <br>\n <sup>3</sup>Shenzhen International Graduate School, Tsinghua University \n <br>\n <sup>*</sup>Equal contribution \n <sup>✝</sup>Corresponding author\n</div>\n</div>\n\n\n## Abstract\nRecent studies have demonstrated the effectiveness of directly aligning diffusion models with human preferences using differentiable reward. However, they exhibit two primary challenges: (1) they rely on multistep denoising with gradient computation for reward scoring, which is computationally expensive, thus restricting optimization to only a few diffusion steps; (2) they often need continuous offline adaptation of reward models in order to achieve desired aesthetic quality, such as photorealism or precise lighting effects. To address the limitation of multistep denoising, we propose Direct-Align, a method that predefines a noise prior to effectively recover original images from any time steps via interpolation, leveraging the equation that diffusion states are interpolations between noise and target images, which effectively avoids over-optimization in late timesteps. Furthermore, we introduce Semantic Relative Preference Optimization (SRPO), in which rewards are formulated as text-conditioned signals. This approach enables online adjustment of rewards in response to positive and negative prompt augmentation, thereby reducing the reliance on offline reward fine-tuning. By fine-tuning the FLUX.1.dev model with optimized denoising and online reward adjustment, we improve its human-evaluated realism and aesthetic quality by over 3x.\n\n## Quick Started\n### Checkpoints\nThe `diffusion_pytorch_model.safetensors` is online version of SRPO based on [FLUX.1 Dev](https://huggingface.co/black-forest-labs/FLUX.1-dev), trained on HPD dataset with [HPSv2](https://github.com/tgxs002/HPSv2)\n\n#### Inference\nReplace the `diffusion_pytorch_model.safetensors` of FLUX\n```python\nfrom diffusers import FluxPipeline\nprompt='The Death of Ophelia by John Everett Millais, Pre-Raphaelite painting, Ophelia floating in a river surrounded by flowers, detailed natural elements, melancholic and tragic atmosphere'\npipe = FluxPipeline.from_pretrained('./data/flux',\n torch_dtype=torch.bfloat16,\n use_safetensors=True\n ).to(""cuda"")\nstate_dict = load_file(""./srpo/diffusion_pytorch_model.safetensors"")\npipe.transformer.load_state_dict(state_dict)\nimage = pipe(\n prompt,\n guidance_scale=3.5,\n height=1024,\n width=1024,\n num_inference_steps=infer_step,\n max_sequence_length=512,\n generator=generator\n).images[0]\n```\n### License\nSRPO is licensed under the License Terms of SRPO. See `./License.txt` for more details.\n## Citation\nIf you use SRPO for your research, please cite our paper:\n\n```bibtex\n@misc{shen2025directlyaligningdiffusiontrajectory,\n title={Directly Aligning the Full Diffusion Trajectory with Fine-Grained Human Preference}, \n author={Xiangwei Shen and Zhimin Li and Zhantao Yang and Shiyi Zhang and Yingfang Zhang and Donghao Li and Chunyu Wang and Qinglin Lu and Yansong Tang},\n year={2025},\n eprint={2509.06942},\n archivePrefix={arXiv},\n primaryClass={cs.AI},\\n url={https://arxiv.org/abs/2509.06942}, \n}\n```",2025-09-08T12:44:15+00:00,[19],"model_finetune_model:rockerBOO/flux.1-dev-SRPO, model_quantized_model:wikeeyang/SRPO-Refine-Quantized-v1.0, befox/SRPO-GGUF, wikeeyang/SRPO-for-ComfyUI","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",2025-09-10T21:11:28+00:00,[19],model_finetune_model:Alissonerdx/flux.1-dev-SRPO-LoRas,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
1,tencent/SRPO,wikeeyang/SRPO-Refine-Quantized-v1.0,model_quantized_model,3,model,model,"---\nlibrary_name: diffusers\nlicense: other\nlicense_name: tencent-hunyuan-community\nlicense_link: https://github.com/Tencent-Hunyuan/SRPO/blob/main/LICENSE.txt\npipeline_tag: text-to-image\n---\n\n<div align=“center” style=“font-family: charter;”>\n<h1 align=""center"">Directly Aligning the Full Diffusion Trajectory with Fine-Grained Human Preference </h1>\n<div align=""center"">\n <a href='https://arxiv.org/abs/2509.06942'><img src='https://img.shields.io/badge/ArXiv-red?logo=arxiv'></a> &nbsp;\n <a href='https://github.com/Tencent-Hunyuan/SRPO'><img src='https://img.shields.io/badge/_Code-SRPO-181717?color=121717&logo=github&logoColor=whitee'></a> &nbsp; \n <a href='https://tencent.github.io/srpo-project-page/'><img src='https://img.shields.io/badge/%F0%9F%92%BB_Project-SRPO-blue'></a> &nbsp;\n</div>\n<div align=""center"">\n Xiangwei Shen<sup>1,2*</sup>,\n <a href=""https://scholar.google.com/citations?user=Lnr1FQEAAAAJ&hl=zh-CN"" target=""_blank""><b>Zhimin Li</b></a><sup>1*</sup>,\n <a href=""https://scholar.google.com.hk/citations?user=Fz3X5FwAAAAJ"" target=""_blank""><b>Zhantao Yang</b></a><sup>1</sup>, \n <a href=""https://shiyi-zh0408.github.io/"" target=""_blank""><b>Shiyi Zhang</b></a><sup>3</sup>,\n Yingfang Zhang<sup>1</sup>,\n Donghao Li<sup>1</sup>,\n <br>\n <a href=""https://scholar.google.com/citations?user=VXQV5xwAAAAJ&hl=en"" target=""_blank""><b>Chunyu Wang</b></a><sup>1</sup>,\n <a href=""https://openreview.net/profile?id=%7EQinglin_Lu2"" target=""_blank""><b>Qinglin Lu</b></a><sup>1</sup>,\n <a href=""https://andytang15.github.io"" target=""_blank""><b>Yansong Tang</b></a><sup>3,✝</sup>\n</div>\n<div align=""center"">\n <sup>1</sup>Hunyuan, Tencent \n <br>\n <sup>2</sup>School of Science and Engineering, The Chinese University of Hong Kong, Shenzhen \n <br>\n <sup>3</sup>Shenzhen International Graduate School, Tsinghua University \n <br>\n <sup>*</sup>Equal contribution \n <sup>✝</sup>Corresponding author\n</div>\n\n\n\n## Abstract\nRecent studies have demonstrated the effectiveness of directly aligning diffusion models with human preferences using differentiable reward. However, they exhibit two primary challenges: (1) they rely on multistep denoising with gradient computation for reward scoring, which is computationally expensive, thus restricting optimization to only a few diffusion steps; (2) they often need continuous offline adaptation of reward models in order to achieve desired aesthetic quality, such as photorealism or precise lighting effects. To address the limitation of multistep denoising, we propose Direct-Align, a method that predefines a noise prior to effectively recover original images from any time steps via interpolation, leveraging the equation that diffusion states are interpolations between noise and target images, which effectively avoids over-optimization in late timesteps. Furthermore, we introduce Semantic Relative Preference Optimization (SRPO), in which rewards are formulated as text-conditioned signals. This approach enables online adjustment of rewards in response to positive and negative prompt augmentation, thereby reducing the reliance on offline reward fine-tuning. By fine-tuning the FLUX.1.dev model with optimized denoising and online reward adjustment, we improve its human-evaluated realism and aesthetic quality by over 3x.\n\n## Acknowledgement\n\nWe sincerely appreciate contributions from the research community to this project. Below are quantized versions developed by fellow researchers.\n\n1. 8bit(fp8_e4m3fn/Q8_0) version by wikeeyang: https://huggingface.co/wikeeyang/SRPO-Refine-Quantized-v1.0\n![image/png](https://cdn-uploads.huggingface.co/production/uploads/6645835a2b57c619a19cc0c4/BATJ0bW_0QPhkN5WY0Q1H.png)\n\n2. bf16 version by rockerBOO: https://huggingface.co/rockerBOO/flux.1-dev-SRPO\n3. GGUF version by befox: https://huggingface.co/befox/SRPO-GGUF\n\n⚠️ Note: When loading weights in ComfyUI, avoid direct conversion of FP32 weights to FP8 format, as this may result in incomplete denoising. For official weights in this repository, FP32/BF16 loading is recommended.\n\n\n### Checkpoints\nThe `diffusion_pytorch_model.safetensors` is online version of SRPO based on [FLUX.1 Dev](https://huggingface.co/black-forest-labs/FLUX.1-dev), trained on HPD dataset with [HPSv2](https://github.com/tgxs002/HPSv2)\n## 🔑 Inference\n\n### Using ComfyUI\n\nYou can use it in [ComfyUI](https://github.com/comfyanonymous/ComfyUI).\n\nLoad the following image in ComfyUI to get the workflow, or load the JSON file directly [SRPO-workflow](comfyui/SRPO-workflow.json):\n\nTip: The workflow JSON info was added to the image file.\n\n![Example](comfyui/SRPO-workflow.png)\n\n### Quick start\n```bash\nfrom diffusers import FluxPipeline\nfrom safetensors.torch import load_file\n\nprompt='The Death of Ophelia by John Everett Millais, Pre-Raphaelite painting, Ophelia floating in a river surrounded by flowers, detailed natural elements, melancholic and tragic atmosphere'\npipe = FluxPipeline.from_pretrained('./data/flux',\n torch_dtype=torch.bfloat16,\n use_safetensors=True\n ).to(""cuda"")\nstate_dict = load_file(""./srpo/diffusion_pytorch_model.safetensors"")\npipe.transformer.load_state_dict(state_dict)\nimage = pipe(\n prompt,\n guidance_scale=3.5,\n height=1024,\n width=1024,\n num_inference_steps=50,\n max_sequence_length=512,\n generator=generator\n).images[0]\n```\n### License\nSRPO is licensed under the License Terms of SRPO. See `./License.txt` for more details.\n## Citation\nIf you use SRPO for your research, please cite our paper:\n\n```bibtex\n@misc{shen2025directlyaligningdiffusiontrajectory,\n title={Directly Aligning the Full Diffusion Trajectory with Fine-Grained Human Preference}, \n author={Xiangwei Shen and Zhimin Li and Zhantao Yang and Shiyi Zhang and Yingfang Zhang and Donghao Li and Chunyu Wang and Qinglin Lu and Yansong Tang},\n year={2025},\n eprint={2509.06942},\n archivePrefix={arXiv},\n primaryClass={cs.AI},\n url={https://arxiv.org/abs/2509.06942}, \n}\n```","---\nlibrary_name: diffusers\nlicense: other\nlicense_name: tencent-hunyuan-community\nlicense_link: https://github.com/Tencent-Hunyuan/SRPO/blob/main/LICENSE.txt\npipeline_tag: text-to-image\nlanguage:\n- en\nbase_model:\n- tencent/SRPO\n---\n===================================================================================\n\n本模型为 https://huggingface.co/tencent/SRPO 模型的 精调 和 8bit/4bit (fp8_e4m3fn/Q8_0/Q4_1) 量化版本，主要提升出图的清晰度和模型的兼容性(第一张图片中的 SRPO-fp8 量化生成的图片，显得特别模糊，主要是由于采用 ComfyUI 模型加载并直接量化的方式造成，并非模型 fp8 精度下的实际表现，实际表现请参阅第二张对比图，为避免使用者误解，特提供第二张对比图，模型在不同精度下的表现是正常的)。\n\nThis model is the refine and quantized version of the model: https://huggingface.co/tencent/SRPO, it improve the clarity of the generated images and the compatibility of the models.\n(In below image, the SRPO-fp8 means load and quantized directly by ComfyUI diffusion model loader nodes)\n<p align=""center"">\n <img src=""Compare.jpg"" width=""1200""/>\n<p>\n\n<u>Compare SRPO offical and R&Q v1.0 in the same quantized accuracy:</u>\n\n<p align=""center"">\n <img src=""Compare-02.jpg"" width=""1200""/>\n<p>\n\n## Example workflow: Please refer to workflow.png\n\n## License Agreement\n\nPlease fall under SRPO license refer license.txt file and refer to the FLUX.1 [dev] Non-Commercial License. \n\nAlso: https://civitai.com/models/1953067\n\n以下部分引用自原模型说明内容：\n\n===================================================================================\n\n\n<div align=“center” style=“font-family: charter;”>\n<h1 align=""center"">Directly Aligning the Full Diffusion Trajectory with Fine-Grained Human Preference </h1>\n<div align=""center"">\n <a href='https://arxiv.org/abs/2509.06942'><img src='https://img.shields.io/badge/ArXiv-red?logo=arxiv'></a> &nbsp;\n <a href='https://github.com/Tencent-Hunyuan/SRPO'><img src='https://img.shields.io/badge/_Code-SRPO-181717?color=121717&logo=github&logoColor=whitee'></a> &nbsp; \n <a href='https://tencent.github.io/srpo-project-page/'><img src='https://img.shields.io/badge/%F0%9F%92%BB_Project-SRPO-blue'></a> &nbsp;\n</div>\n<div align=""center"">\n Xiangwei Shen<sup>1,2*</sup>,\n <a href=""https://scholar.google.com/citations?user=Lnr1FQEAAAAJ&hl=zh-CN"" target=""_blank""><b>Zhimin Li</b></a><sup>1*</sup>,\n <a href=""https://scholar.google.com.hk/citations?user=Fz3X5FwAAAAJ"" target=""_blank""><b>Zhantao Yang</b></a><sup>1</sup>, \n <a href=""https://shiyi-zh0408.github.io/"" target=""_blank""><b>Shiyi Zhang</b></a><sup>3</sup>,\n Yingfang Zhang<sup>1</sup>,\n Donghao Li<sup>1</sup>,\n <br>\n <a href=""https://scholar.google.com/citations?user=VXQV5xwAAAAJ&hl=en"" target=""_blank""><b>Chunyu Wang</b></a><sup>1</sup>,\n <a href=""https://openreview.net/profile?id=%7EQinglin_Lu2"" target=""_blank""><b>Qinglin Lu</b></a><sup>1</sup>,\n <a href=""https://andytang15.github.io"" target=""_blank""><b>Yansong Tang</b></a><sup>3,✝</sup>\n</div>\n<div align=""center"">\n <sup>1</sup>Hunyuan, Tencent \n <br>\n <sup>2</sup>School of Science and Engineering, The Chinese University of Hong Kong, Shenzhen \n <br>\n <sup>3</sup>Shenzhen International Graduate School, Tsinghua University \n <br>\n <sup>*</sup>Equal contribution \n <sup>✝</sup>Corresponding author\n</div>\n\n\n\n## Abstract\nRecent studies have demonstrated the effectiveness of directly aligning diffusion models with human preferences using differentiable reward. However, they exhibit two primary challenges: (1) they rely on multistep denoising with gradient computation for reward scoring, which is computationally expensive, thus restricting optimization to only a few diffusion steps; (2) they often need continuous offline adaptation of reward models in order to achieve desired aesthetic quality, such as photorealism or precise lighting effects. To address the limitation of multistep denoising, we propose Direct-Align, a method that predefines a noise prior to effectively recover original images from any time steps via interpolation, leveraging the equation that diffusion states are interpolations between noise and target images, which effectively avoids over-optimization in late timesteps. Furthermore, we introduce Semantic Relative Preference Optimization (SRPO), in which rewards are formulated as text-conditioned signals. This approach enables online adjustment of rewards in response to positive and negative prompt augmentation, thereby reducing the reliance on offline reward fine-tuning. By fine-tuning the FLUX.1.dev model with optimized denoising and online reward adjustment, we improve its human-evaluated realism and aesthetic quality by over 3x.\n### Checkpoints\nThe `diffusion_pytorch_model.safetensors` is online version of SRPO based on [FLUX.1 Dev](https://huggingface.co/black-forest-labs/FLUX.1-dev), trained on HPD dataset with [HPSv2](https://github.com/tgxs002/HPSv2)\n\n### License\nSRPO is licensed under the License Terms of SRPO. See `./License.txt` for more details.\n## Citation\nIf you use SRPO for your research, please cite our paper:\n\n```bibtex\n@misc{shen2025directlyaligningdiffusiontrajectory,\n title={Directly Aligning the Full Diffusion Trajectory with Fine-Grained Human Preference}, \n author={Xiangwei Shen and Zhimin Li and Zhantao Yang and Shiyi Zhang and Yingfang Zhang and Donghao Li and Chunyu Wang and Qinglin Lu and Yansong Tang},\n year={2025},\n eprint={2509.06942},\n archivePrefix={arXiv},\n primaryClass={cs.AI},\n url={https://arxiv.org/abs/2509.06942}, \n}\n```",2025-09-08T12:44:15+00:00,[19],"model_finetune_model:rockerBOO/flux.1-dev-SRPO, model_quantized_model:wikeeyang/SRPO-Refine-Quantized-v1.0, befox/SRPO-GGUF, wikeeyang/SRPO-for-ComfyUI","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",2025-09-13T05:29:39+00:00,[19],,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
2,tencent/SRPO,befox/SRPO-GGUF,model_quantized_model,3,model,model,"---\nlibrary_name: diffusers\nlicense: other\nlicense_name: tencent-hunyuan-community\nlicense_link: https://github.com/Tencent-Hunyuan/SRPO/blob/main/LICENSE.txt\npipeline_tag: text-to-image\n---\n\n<div align=“center” style=“font-family: charter;”>\n<h1 align=""center"">Directly Aligning the Full Diffusion Trajectory with Fine-Grained Human Preference </h1>\n<div align=""center"">\n <a href='https://arxiv.org/abs/2509.06942'><img src='https://img.shields.io/badge/ArXiv-red?logo=arxiv'></a> &nbsp;\n <a href='https://github.com/Tencent-Hunyuan/SRPO'><img src='https://img.shields.io/badge/_Code-SRPO-181717?color=121717&logo=github&logoColor=whitee'></a> &nbsp; \n <a href='https://tencent.github.io/srpo-project-page/'><img src='https://img.shields.io/badge/%F0%9F%92%BB_Project-SRPO-blue'></a> &nbsp;\n</div>\n<div align=""center"">\n Xiangwei Shen<sup>1,2*</sup>,\n <a href=""https://scholar.google.com/citations?user=Lnr1FQEAAAAJ&hl=zh-CN"" target=""_blank""><b>Zhimin Li</b></a><sup>1*</sup>,\n <a href=""https://scholar.google.com.hk/citations?user=Fz3X5FwAAAAJ"" target=""_blank""><b>Zhantao Yang</b></a><sup>1</sup>, \n <a href=""https://shiyi-zh0408.github.io/"" target=""_blank""><b>Shiyi Zhang</b></a><sup>3</sup>,\n Yingfang Zhang<sup>1</sup>,\n Donghao Li<sup>1</sup>,\n <br>\n <a href=""https://scholar.google.com/citations?user=VXQV5xwAAAAJ&hl=en"" target=""_blank""><b>Chunyu Wang</b></a><sup>1</sup>,\n <a href=""https://openreview.net/profile?id=%7EQinglin_Lu2"" target=""_blank""><b>Qinglin Lu</b></a><sup>1</sup>,\n <a href=""https://andytang15.github.io"" target=""_blank""><b>Yansong Tang</b></a><sup>3,✝</sup>\n</div>\n<div align=""center"">\n <sup>1</sup>Hunyuan, Tencent \n <br>\n <sup>2</sup>School of Science and Engineering, The Chinese University of Hong Kong, Shenzhen \n <br>\n <sup>3</sup>Shenzhen International Graduate School, Tsinghua University \n <br>\n <sup>*</sup>Equal contribution \n <sup>✝</sup>Corresponding author\n</div>\n\n\n\n## Abstract\nRecent studies have demonstrated the effectiveness of directly aligning diffusion models with human preferences using differentiable reward. However, they exhibit two primary challenges: (1) they rely on multistep denoising with gradient computation for reward scoring, which is computationally expensive, thus restricting optimization to only a few diffusion steps; (2) they often need continuous offline adaptation of reward models in order to achieve desired aesthetic quality, such as photorealism or precise lighting effects. To address the limitation of multistep denoising, we propose Direct-Align, a method that predefines a noise prior to effectively recover original images from any time steps via interpolation, leveraging the equation that diffusion states are interpolations between noise and target images, which effectively avoids over-optimization in late timesteps. Furthermore, we introduce Semantic Relative Preference Optimization (SRPO), in which rewards are formulated as text-conditioned signals. This approach enables online adjustment of rewards in response to positive and negative prompt augmentation, thereby reducing the reliance on offline reward fine-tuning. By fine-tuning the FLUX.1.dev model with optimized denoising and online reward adjustment, we improve its human-evaluated realism and aesthetic quality by over 3x.\n\n## Acknowledgement\n\nWe sincerely appreciate contributions from the research community to this project. Below are quantized versions developed by fellow researchers.\n\n1. 8bit(fp8_e4m3fn/Q8_0) version by wikeeyang: https://huggingface.co/wikeeyang/SRPO-Refine-Quantized-v1.0\n![image/png](https://cdn-uploads.huggingface.co/production/uploads/6645835a2b57c619a19cc0c4/BATJ0bW_0QPhkN5WY0Q1H.png)\n\n2. bf16 version by rockerBOO: https://huggingface.co/rockerBOO/flux.1-dev-SRPO\n3. GGUF version by befox: https://huggingface.co/befox/SRPO-GGUF\n\n⚠️ Note: When loading weights in ComfyUI, avoid direct conversion of FP32 weights to FP8 format, as this may result in incomplete denoising. For official weights in this repository, FP32/BF16 loading is recommended.\n\n\n### Checkpoints\nThe `diffusion_pytorch_model.safetensors` is online version of SRPO based on [FLUX.1 Dev](https://huggingface.co/black-forest-labs/FLUX.1-dev), trained on HPD dataset with [HPSv2](https://github.com/tgxs002/HPSv2)\n## 🔑 Inference\n\n### Using ComfyUI\n\nYou can use it in [ComfyUI](https://github.com/comfyanonymous/ComfyUI).\n\nLoad the following image in ComfyUI to get the workflow, or load the JSON file directly [SRPO-workflow](comfyui/SRPO-workflow.json):\n\nTip: The workflow JSON info was added to the image file.\n\n![Example](comfyui/SRPO-workflow.png)\n\n### Quick start\n```bash\nfrom diffusers import FluxPipeline\nfrom safetensors.torch import load_file\n\nprompt='The Death of Ophelia by John Everett Millais, Pre-Raphaelite painting, Ophelia floating in a river surrounded by flowers, detailed natural elements, melancholic and tragic atmosphere'\npipe = FluxPipeline.from_pretrained('./data/flux',\n torch_dtype=torch.bfloat16,\n use_safetensors=True\n ).to(""cuda"")\nstate_dict = load_file(""./srpo/diffusion_pytorch_model.safetensors"")\npipe.transformer.load_state_dict(state_dict)\nimage = pipe(\n prompt,\n guidance_scale=3.5,\n height=1024,\n width=1024,\n num_inference_steps=50,\n max_sequence_length=512,\n generator=generator\n).images[0]\n```\n### License\nSRPO is licensed under the License Terms of SRPO. See `./License.txt` for more details.\n## Citation\nIf you use SRPO for your research, please cite our paper:\n\n```bibtex\n@misc{shen2025directlyaligningdiffusiontrajectory,\n title={Directly Aligning the Full Diffusion Trajectory with Fine-Grained Human Preference}, \n author={Xiangwei Shen and Zhimin Li and Zhantao Yang and Shiyi Zhang and Yingfang Zhang and Donghao Li and Chunyu Wang and Qinglin Lu and Yansong Tang},\n year={2025},\n eprint={2509.06942},\n archivePrefix={arXiv},\n primaryClass={cs.AI},\n url={https://arxiv.org/abs/2509.06942}, \n}\n```",,2025-09-08T12:44:15+00:00,[19],"model_finetune_model:rockerBOO/flux.1-dev-SRPO, model_quantized_model:wikeeyang/SRPO-Refine-Quantized-v1.0, befox/SRPO-GGUF, wikeeyang/SRPO-for-ComfyUI","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",,,,
3,tencent/SRPO,wikeeyang/SRPO-for-ComfyUI,model_quantized_model,3,model,model,"---\nlibrary_name: diffusers\nlicense: other\nlicense_name: tencent-hunyuan-community\nlicense_link: https://github.com/Tencent-Hunyuan/SRPO/blob/main/LICENSE.txt\npipeline_tag: text-to-image\n---\n\n<div align=“center” style=“font-family: charter;”>\n<h1 align=""center"">Directly Aligning the Full Diffusion Trajectory with Fine-Grained Human Preference </h1>\n<div align=""center"">\n <a href='https://arxiv.org/abs/2509.06942'><img src='https://img.shields.io/badge/ArXiv-red?logo=arxiv'></a> &nbsp;\n <a href='https://github.com/Tencent-Hunyuan/SRPO'><img src='https://img.shields.io/badge/_Code-SRPO-181717?color=121717&logo=github&logoColor=whitee'></a> &nbsp; \n <a href='https://tencent.github.io/srpo-project-page/'><img src='https://img.shields.io/badge/%F0%9F%92%BB_Project-SRPO-blue'></a> &nbsp;\n</div>\n<div align=""center"">\n Xiangwei Shen<sup>1,2*</sup>,\n <a href=""https://scholar.google.com/citations?user=Lnr1FQEAAAAJ&hl=zh-CN"" target=""_blank""><b>Zhimin Li</b></a><sup>1*</sup>,\n <a href=""https://scholar.google.com.hk/citations?user=Fz3X5FwAAAAJ"" target=""_blank""><b>Zhantao Yang</b></a><sup>1</sup>, \n <a href=""https://shiyi-zh0408.github.io/"" target=""_blank""><b>Shiyi Zhang</b></a><sup>3</sup>,\n Yingfang Zhang<sup>1</sup>,\n Donghao Li<sup>1</sup>,\n <br>\n <a href=""https://scholar.google.com/citations?user=VXQV5xwAAAAJ&hl=en"" target=""_blank""><b>Chunyu Wang</b></a><sup>1</sup>,\n <a href=""https://openreview.net/profile?id=%7EQinglin_Lu2"" target=""_blank""><b>Qinglin Lu</b></a><sup>1</sup>,\n <a href=""https://andytang15.github.io"" target=""_blank""><b>Yansong Tang</b></a><sup>3,✝</sup>\n</div>\n<div align=""center"">\n <sup>1</sup>Hunyuan, Tencent \n <br>\n <sup>2</sup>School of Science and Engineering, The Chinese University of Hong Kong, Shenzhen \n <br>\n <sup>3</sup>Shenzhen International Graduate School, Tsinghua University \n <br>\n <sup>*</sup>Equal contribution \n <sup>✝</sup>Corresponding author\n</div>\n\n\n\n## Abstract\nRecent studies have demonstrated the effectiveness of directly aligning diffusion models with human preferences using differentiable reward. However, they exhibit two primary challenges: (1) they rely on multistep denoising with gradient computation for reward scoring, which is computationally expensive, thus restricting optimization to only a few diffusion steps; (2) they often need continuous offline adaptation of reward models in order to achieve desired aesthetic quality, such as photorealism or precise lighting effects. To address the limitation of multistep denoising, we propose Direct-Align, a method that predefines a noise prior to effectively recover original images from any time steps via interpolation, leveraging the equation that diffusion states are interpolations between noise and target images, which effectively avoids over-optimization in late timesteps. Furthermore, we introduce Semantic Relative Preference Optimization (SRPO), in which rewards are formulated as text-conditioned signals. This approach enables online adjustment of rewards in response to positive and negative prompt augmentation, thereby reducing the reliance on offline reward fine-tuning. By fine-tuning the FLUX.1.dev model with optimized denoising and online reward adjustment, we improve its human-evaluated realism and aesthetic quality by over 3x.\n\n## Acknowledgement\n\nWe sincerely appreciate contributions from the research community to this project. Below are quantized versions developed by fellow researchers.\n\n1. 8bit(fp8_e4m3fn/Q8_0) version by wikeeyang: https://huggingface.co/wikeeyang/SRPO-Refine-Quantized-v1.0\n![image/png](https://cdn-uploads.huggingface.co/production/uploads/6645835a2b57c619a19cc0c4/BATJ0bW_0QPhkN5WY0Q1H.png)\n\n2. bf16 version by rockerBOO: https://huggingface.co/rockerBOO/flux.1-dev-SRPO\n3. GGUF version by befox: https://huggingface.co/befox/SRPO-GGUF\n\n⚠️ Note: When loading weights in ComfyUI, avoid direct conversion of FP32 weights to FP8 format, as this may result in incomplete denoising. For official weights in this repository, FP32/BF16 loading is recommended.\n\n\n### Checkpoints\nThe `diffusion_pytorch_model.safetensors` is online version of SRPO based on [FLUX.1 Dev](https://huggingface.co/black-forest-labs/FLUX.1-dev), trained on HPD dataset with [HPSv2](https://github.com/tgxs002/HPSv2)\n## 🔑 Inference\n\n### Using ComfyUI\n\nYou can use it in [ComfyUI](https://github.com/comfyanonymous/ComfyUI).\n\nLoad the following image in ComfyUI to get the workflow, or load the JSON file directly [SRPO-workflow](comfyui/SRPO-workflow.json):\n\nTip: The workflow JSON info was added to the image file.\n\n![Example](comfyui/SRPO-workflow.png)\n\n### Quick start\n```bash\nfrom diffusers import FluxPipeline\nfrom safetensors.torch import load_file\n\nprompt='The Death of Ophelia by John Everett Millais, Pre-Raphaelite painting, Ophelia floating in a river surrounded by flowers, detailed natural elements, melancholic and tragic atmosphere'\npipe = FluxPipeline.from_pretrained('./data/flux',\n torch_dtype=torch.bfloat16,\n use_safetensors=True\n ).to(""cuda"")\nstate_dict = load_file(""./srpo/diffusion_pytorch_model.safetensors"")\npipe.transformer.load_state_dict(state_dict)\nimage = pipe(\n prompt,\n guidance_scale=3.5,\n height=1024,\n width=1024,\n num_inference_steps=50,\n max_sequence_length=512,\n generator=generator\n).images[0]\n```\n### License\nSRPO is licensed under the License Terms of SRPO. See `./License.txt` for more details.\n## Citation\nIf you use SRPO for your research, please cite our paper:\n\n```bibtex\n@misc{shen2025directlyaligningdiffusiontrajectory,\n title={Directly Aligning the Full Diffusion Trajectory with Fine-Grained Human Preference}, \n author={Xiangwei Shen and Zhimin Li and Zhantao Yang and Shiyi Zhang and Yingfang Zhang and Donghao Li and Chunyu Wang and Qinglin Lu and Yansong Tang},\n year={2025},\n eprint={2509.06942},\n archivePrefix={arXiv},\n primaryClass={cs.AI},\n url={https://arxiv.org/abs/2509.06942}, \n}\n```","---\nlibrary_name: diffusers\nlicense: other\nlicense_name: tencent-hunyuan-community\nlicense_link: https://github.com/Tencent-Hunyuan/SRPO/blob/main/LICENSE.txt\npipeline_tag: text-to-image\nlanguage:\n- en\nbase_model:\n- tencent/SRPO\n---\n===================================================================================\n\n本模型为 https://huggingface.co/tencent/SRPO 模型的 转换 和 8bit/4bit (fp8_e4m3fn/Q8_0/Q4_1) 量化版本，以适配 ComfyUI 用户环境正常加载和出图，保持原模型正常的出图效果。\n\nThis model is the converted and quantized version of the model: https://huggingface.co/tencent/SRPO, To adapt the ComfyUI environment for normal loading and output of images, maintaining the original model's normal effects.\n\n<u> For bf16 version, Pls download it from: https://www.modelscope.cn/models/wikeeyang/SRPO-for-ComfyUI </u>\n\n<p align=""center"">\n <img src=""example.jpg"" width=""1200""/>\n<p>\n\n## License Agreement\n\nPlease fall under SRPO license refer license.txt file and refer to the FLUX.1 [dev] Non-Commercial License. \n\n\n以下部分引用自原模型说明内容：\n\n===================================================================================\n\n\n<div align=“center” style=“font-family: charter;”>\n<h1 align=""center"">Directly Aligning the Full Diffusion Trajectory with Fine-Grained Human Preference </h1>\n<div align=""center"">\n <a href='https://arxiv.org/abs/2509.06942'><img src='https://img.shields.io/badge/ArXiv-red?logo=arxiv'></a> &nbsp;\n <a href='https://github.com/Tencent-Hunyuan/SRPO'><img src='https://img.shields.io/badge/_Code-SRPO-181717?color=121717&logo=github&logoColor=whitee'></a> &nbsp; \n <a href='https://tencent.github.io/srpo-project-page/'><img src='https://img.shields.io/badge/%F0%9F%92%BB_Project-SRPO-blue'></a> &nbsp;\n</div>\n<div align=""center"">\n Xiangwei Shen<sup>1,2*</sup>,\n <a href=""https://scholar.google.com/citations?user=Lnr1FQEAAAAJ&hl=zh-CN"" target=""_blank""><b>Zhimin Li</b></a><sup>1*</sup>,\n <a href=""https://scholar.google.com.hk/citations?user=Fz3X5FwAAAAJ"" target=""_blank""><b>Zhantao Yang</b></a><sup>1</sup>, \n <a href=""https://shiyi-zh0408.github.io/"" target=""_blank""><b>Shiyi Zhang</b></a><sup>3</sup>,\n Yingfang Zhang<sup>1</sup>,\n Donghao Li<sup>1</sup>,\n <br>\n <a href=""https://scholar.google.com/citations?user=VXQV5xwAAAAJ&hl=en"" target=""_blank""><b>Chunyu Wang</b></a><sup>1</sup>,\n <a href=""https://openreview.net/profile?id=%7EQinglin_Lu2"" target=""_blank""><b>Qinglin Lu</b></a><sup>1</sup>,\n <a href=""https://andytang15.github.io"" target=""_blank""><b>Yansong Tang</b></a><sup>3,✝</sup>\n</div>\n<div align=""center"">\n <sup>1</sup>Hunyuan, Tencent \n <br>\n <sup>2</sup>School of Science and Engineering, The Chinese University of Hong Kong, Shenzhen \n <br>\n <sup>3</sup>Shenzhen International Graduate School, Tsinghua University \n <br>\n <sup>*</sup>Equal contribution \n <sup>✝</sup>Corresponding author\n</div>\n\n\n\n## Abstract\nRecent studies have demonstrated the effectiveness of directly aligning diffusion models with human preferences using differentiable reward. However, they exhibit two primary challenges: (1) they rely on multistep denoising with gradient computation for reward scoring, which is computationally expensive, thus restricting optimization to only a few diffusion steps; (2) they often need continuous offline adaptation of reward models in order to achieve desired aesthetic quality, such as photorealism or precise lighting effects. To address the limitation of multistep denoising, we propose Direct-Align, a method that predefines a noise prior to effectively recover original images from any time steps via interpolation, leveraging the equation that diffusion states are interpolations between noise and target images, which effectively avoids over-optimization in late timesteps. Furthermore, we introduce Semantic Relative Preference Optimization (SRPO), in which rewards are formulated as text-conditioned signals. This approach enables online adjustment of rewards in response to positive and negative prompt augmentation, thereby reducing the reliance on offline reward fine-tuning. By fine-tuning the FLUX.1.dev model with optimized denoising and online reward adjustment, we improve its human-evaluated realism and aesthetic quality by over 3x.\n### Checkpoints\nThe `diffusion_pytorch_model.safetensors` is online version of SRPO based on [FLUX.1 Dev](https://huggingface.co/black-forest-labs/FLUX.1-dev), trained on HPD dataset with [HPSv2](https://github.com/tgxs002/HPSv2)\n\n### License\nSRPO is licensed under the License Terms of SRPO. See `./License.txt` for more details.\n## Citation\nIf you use SRPO for your research, please cite our paper:\n\n```bibtex\n@misc{shen2025directlyaligningdiffusiontrajectory,\n title={Directly Aligning the Full Diffusion Trajectory with Fine-Grained Human Preference}, \n author={Xiangwei Shen and Zhimin Li and Zhantao Yang and Shiyi Zhang and Yingfang Zhang and Donghao Li and Chunyu Wang and Qinglin Lu and Yansong Tang},\n year={2025},\n eprint={2509.06942},\n archivePrefix={arXiv},\n primaryClass={cs.AI},\n url={https://arxiv.org/abs/2509.06942}, \n}\n```",2025-09-08T12:44:15+00:00,[19],"model_finetune_model:rockerBOO/flux.1-dev-SRPO, model_quantized_model:wikeeyang/SRPO-Refine-Quantized-v1.0, befox/SRPO-GGUF, wikeeyang/SRPO-for-ComfyUI","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",2025-09-16T04:54:58+00:00,[19],,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
4,baidu/ERNIE-4.5-21B-A3B-Thinking,unsloth/ERNIE-4.5-21B-A3B-Thinking,model_finetune_model,0,model,model,"---\nlicense: apache-2.0\nlanguage:\n- en\n- zh\npipeline_tag: text-generation\ntags:\n- ERNIE4.5\nlibrary_name: transformers\n---\n\n<div align=""center"" style=""line-height: 1;"">\n <a href=""https://ernie.baidu.com/"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Chat"" src=""https://img.shields.io/badge/🤖_Chat-ERNIE_Bot-blue"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://huggingface.co/baidu"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Hugging Face"" src=""https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Baidu-ffc107?color=ffc107&logoColor=white"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://github.com/PaddlePaddle/ERNIE"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Github"" src=""https://img.shields.io/badge/GitHub-ERNIE-000?logo=github&color=0000FF"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://ernie.baidu.com/blog/ernie4.5"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Blog"" src=""https://img.shields.io/badge/🖖_Blog-ERNIE4.5-A020A0"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://discord.gg/JPmZXDsEEK"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Discord"" src=""https://img.shields.io/badge/Discord-ERNIE-5865F2?logo=discord&logoColor=white"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://x.com/PaddlePaddle"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""X"" src=""https://img.shields.io/badge/X-PaddlePaddle-6080F0""?logo=x&logoColor=white"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n</div>\n\n<div align=""center"" style=""line-height: 1;"">\n <a href=""#license"" style=""margin: 2px;"">\n <img alt=""License"" src=""https://img.shields.io/badge/License-Apache2.0-A5de54"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n</div>\n\n# ERNIE-4.5-21B-A3B-Thinking\n\n## Model Highlights\n\nOver the past three months, we have continued to scale the **thinking capability** of ERNIE-4.5-21B-A3B, improving both the **quality and depth** of reasoning, thereby advancing the competitiveness of ERNIE **lightweight models** in complex reasoning tasks. We are pleased to introduce **ERNIE-4.5-21B-A3B-Thinking**, featuring the following key enhancements:\n\n* **Significantly improved performance** on reasoning tasks, including logical reasoning, mathematics, science, coding, text generation, and academic benchmarks that typically require human expertise.\n* **Efficient tool usage** capabilities.\n* **Enhanced 128K long-context understanding** capabilities.\n\n> [!NOTE]\n> Note: This version has an increased thinking length. We strongly recommend its use in highly complex reasoning tasks.\n\n![benchmark](./benchmark.png)\n\n## Model Overview\n\nERNIE-4.5-21B-A3B-Thinking is a text MoE post-trained model, with 21B total parameters and 3B activated parameters for each token. The following are the model configuration details:\n\n|Key|Value|\n|-|-|\n|Modality|Text|\n|Training Stage|Posttraining|\n|Params(Total / Activated)|21B / 3B|\n|Layers|28|\n|Heads(Q/KV)|20 / 4|\n|Text Experts(Total / Activated)|64 / 6|\n|Shared Experts|2|\n|Context Length|131072|\n\n## Quickstart\n\n> [!NOTE]\n> To align with the wider community, this model releases Transformer-style weights. Both PyTorch and PaddlePaddle ecosystem tools, such as vLLM, transformers, and FastDeploy, are expected to be able to load and run this model.\n\n### FastDeploy Inference\n\nQuickly deploy services using FastDeploy as shown below. For more detailed usage, refer to the [FastDeploy GitHub Repository](https://github.com/PaddlePaddle/FastDeploy).\n\n**Note**: 80GB x 1 GPU resources are required. Deploying this model requires FastDeploy version 2.2.\n\n```bash\npython -m fastdeploy.entrypoints.openai.api_server \\n --model baidu/ERNIE-4.5-21B-A3B-Thinking \\n --port 8180 \\n --metrics-port 8181 \\n --engine-worker-queue-port 8182 \\n --load_choices ""default_v1"" \\n --tensor-parallel-size 1 \\n --max-model-len 131072 \\n --reasoning-parser ernie_x1 \\n --tool-call-parser ernie_x1 \\n --max-num-seqs 32\n```\n\nThe ERNIE-4.5-21B-A3B-Thinking model supports function call.\n\n```bash\ncurl -X POST ""http://0.0.0.0:8180/v1/chat/completions"" \\n-H ""Content-Type: application/json"" \\n-d $'{\n ""messages"": [\n {\n ""role"": ""user"",\n ""content"": ""How \'s the weather in Beijing today?""\n }\n ],\n ""tools"": [\n {\n ""type"": ""function"",\n ""function"": {\n ""name"": ""get_weather"",\n ""description"": ""Determine weather in my location"",\n ""parameters"": {\n ""type"": ""object"",\n ""properties"": {\n ""location"": {\n ""type"": ""string"",\n ""description"": ""The city and state e.g. San Francisco, CA""\n },\n ""unit"": {\n ""type"": ""string"",\n ""enum"": [\n ""c"",\n ""f""\n ]\n }\n },\n ""additionalProperties"": false,\n ""required"": [\n ""location"",\n ""unit""\n ]\n },\n ""strict"": true\n }\n }]\n}'\n```\n\n### vLLM inference\n\n```bash\nvllm serve baidu/ERNIE-4.5-21B-A3B-Thinking\n```\n\nThe `reasoning-parser` and `tool-call-parser` for vLLM Ernie are currently under development.\n\n### Using `transformers` library\n\n**Note**: You'll need the`transformers`library (version 4.54.0 or newer) installed to use this model.\n\nThe following contains a code snippet illustrating how to use the model generate content based on given inputs.\n\n```python\nimport torch\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\nmodel_name = ""baidu/ERNIE-4.5-21B-A3B-Thinking""\n\n# load the tokenizer and the model\ntokenizer = AutoTokenizer.from_pretrained(model_name)\nmodel = AutoModelForCausalLM.from_pretrained(\n model_name,\n device_map=""auto"",\n torch_dtype=torch.bfloat16,\n)\n\n# prepare the model input\nprompt = ""Give me a short introduction to large language model.""\nmessages = [\n {""role"": ""user"", ""content"": prompt}\n]\ntext = tokenizer.apply_chat_template(\n messages,\n tokenize=False,\n add_generation_prompt=True\n)\nmodel_inputs = tokenizer([text], add_special_tokens=False, return_tensors=""pt"").to(model.device)\n\n# conduct text completion\ngenerated_ids = model.generate(\n **model_inputs,\n max_new_tokens=1024\n)\noutput_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()\n\n# decode the generated ids\ngenerate_text = tokenizer.decode(output_ids, skip_special_tokens=True)\nprint(""generate_text:"", generate_text)\n```\n\n## License\n\nThe ERNIE 4.5 models are provided under the Apache License 2.0. This license permits commercial use, subject to its terms and conditions. Copyright (c) 2025 Baidu, Inc. All Rights Reserved.\n\n## Citation\n\nIf you find ERNIE 4.5 useful or wish to use it in your projects, please kindly cite our technical report:\n\n```text\n@misc{ernie2025technicalreport,\n title={ERNIE 4.5 Technical Report},\n author={Baidu-ERNIE-Team},\n year={2025},\n primaryClass={cs.CL},\n howpublished={\url{https://ernie.baidu.com/blog/publication/ERNIE_Technical_Report.pdf}}\n}\n```\n\n","---\nlicense: apache-2.0\nlanguage:\n- en\n- zh\npipeline_tag: text-generation\ntags:\n- ERNIE4.5\nlibrary_name: transformers\nbase_model: baidu/ERNIE-4.5-21B-A3B-Thinking\n---\n\n<div align=""center"" style=""line-height: 1;"">\n <a href=""https://ernie.baidu.com/"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Chat"" src=""https://img.shields.io/badge/🤖_Chat-ERNIE_Bot-blue"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://huggingface.co/baidu"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Hugging Face"" src=""https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Baidu-ffc107?color=ffc107&logoColor=white"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://github.com/PaddlePaddle/ERNIE"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Github"" src=""https://img.shields.io/badge/GitHub-ERNIE-000?logo=github&color=0000FF"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://ernie.baidu.com/blog/ernie4.5"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Blog"" src=""https://img.shields.io/badge/🖖_Blog-ERNIE4.5-A020A0"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://discord.gg/JPmZXDsEEK"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Discord"" src=""https://img.shields.io/badge/Discord-ERNIE-5865F2?logo=discord&logoColor=white"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://x.com/PaddlePaddle"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""X"" src=""https://img.shields.io/badge/X-PaddlePaddle-6080F0""?logo=x&logoColor=white"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n</div>\n\n<div align=""center"" style=""line-height: 1;"">\n <a href=""#license"" style=""margin: 2px;"">\n <img alt=""License"" src=""https://img.shields.io/badge/License-Apache2.0-A5de54"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n</div>\n\n# ERNIE-4.5-21B-A3B-Thinking\n\n## Model Highlights\n\nOver the past three months, we have continued to scale the **thinking capability** of ERNIE-4.5-21B-A3B, improving both the **quality and depth** of reasoning, thereby advancing the competitiveness of ERNIE **lightweight models** in complex reasoning tasks. We are pleased to introduce **ERNIE-4.5-21B-A3B-Thinking**, featuring the following key enhancements:\n\n* **Significantly improved performance** on reasoning tasks, including logical reasoning, mathematics, science, coding, text generation, and academic benchmarks that typically require human expertise.\n* **Efficient tool usage** capabilities.\n* **Enhanced 128K long-context understanding** capabilities.\n\n> [!NOTE]\n> Note: This version has an increased thinking length. We strongly recommend its use in highly complex reasoning tasks.\n\n![benchmark](./benchmark.png)\n\n## Model Overview\n\nERNIE-4.5-21B-A3B-Thinking is a text MoE post-trained model, with 21B total parameters and 3B activated parameters for each token. The following are the model configuration details:\n\n|Key|Value|\n|-|-|\n|Modality|Text|\n|Training Stage|Posttraining|\n|Params(Total / Activated)|21B / 3B|\n|Layers|28|\n|Heads(Q/KV)|20 / 4|\n|Text Experts(Total / Activated)|64 / 6|\n|Vision Experts(Total / Activated)|64 / 6|\n|Shared Experts|2|\n|Context Length|131072|\n\n## Quickstart\n\n> [!NOTE]\n> To align with the wider community, this model releases Transformer-style weights. Both PyTorch and PaddlePaddle ecosystem tools, such as vLLM, transformers, and FastDeploy, are expected to be able to load and run this model.\n\n### FastDeploy Inference\n\nQuickly deploy services using FastDeploy as shown below. For more detailed usage, refer to the [FastDeploy GitHub Repository](https://github.com/PaddlePaddle/FastDeploy).\n\n**Note**: 80GB x 1 GPU resources are required. Deploying this model requires FastDeploy version 2.2.\n\n```bash\npython -m fastdeploy.entrypoints.openai.api_server \\n --model baidu/ERNIE-4.5-21B-A3B-Thinking \\n --port 8180 \\n --metrics-port 8181 \\n --engine-worker-queue-port 8182 \\n --load_choices ""default_v1"" \\n --tensor-parallel-size 1 \\n --max-model-len 131072 \\n --reasoning-parser ernie_x1 \\n --tool-call-parser ernie_x1 \\n --max-num-seqs 32\n```\n\nThe ERNIE-4.5-21B-A3B-Thinking model supports function call.\n\n```bash\ncurl -X POST ""http://0.0.0.0:8180/v1/chat/completions"" \\n-H ""Content-Type: application/json"" \\n-d $'{\n ""messages"": [\n {\n ""role"": ""user"",\n ""content"": ""How \'s the weather in Beijing today?""\n }\n ],\n ""tools"": [\n {\n ""type"": ""function"",\n ""function"": {\n ""name"": ""get_weather"",\n ""description"": ""Determine weather in my location"",\n ""parameters"": {\n ""type"": ""object"",\n ""properties"": {\n ""location"": {\n ""type"": ""string"",\n ""description"": ""The city and state e.g. San Francisco, CA""\n },\n ""unit"": {\n ""type"": ""string"",\n ""enum"": [\n ""c"",\n ""f""\n ]\n }\n },\n ""additionalProperties"": false,\n ""required"": [\n ""location"",\n ""unit""\n ]\n },\n ""strict"": true\n }\n }]\n}'\n```\n\n### vLLM inference\n\n```bash\nvllm serve baidu/ERNIE-4.5-21B-A3B-Thinking\n```\n\nThe `reasoning-parser` and `tool-call-parser` for vLLM Ernie are currently under development.\n\n### Using `transformers` library\n\n**Note**: You'll need the`transformers`library (version 4.54.0 or newer) installed to use this model.\n\nThe following contains a code snippet illustrating how to use the model generate content based on given inputs.\n\n```python\nimport torch\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\nmodel_name = ""baidu/ERNIE-4.5-21B-A3B-Thinking""\n\n# load the tokenizer and the model\ntokenizer = AutoTokenizer.from_pretrained(model_name)\nmodel = AutoModelForCausalLM.from_pretrained(\n model_name,\n device_map=""auto"",\n torch_dtype=torch.bfloat16,\n)\n\n# prepare the model input\nprompt = ""Give me a short introduction to large language model.""\nmessages = [\n {""role"": ""user"", ""content"": prompt}\n]\ntext = tokenizer.apply_chat_template(\n messages,\n tokenize=False,\n add_generation_prompt=True\n)\nmodel_inputs = tokenizer([text], add_special_tokens=False, return_tensors=""pt"").to(model.device)\n\n# conduct text completion\ngenerated_ids = model.generate(\n **model_inputs,\n max_new_tokens=1024\n)\noutput_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()\n\n# decode the generated ids\ngenerate_text = tokenizer.decode(output_ids, skip_special_tokens=True)\nprint(""generate_text:"", generate_text)\n```\n\n## License\n\nThe ERNIE 4.5 models are provided under the Apache License 2.0. This license permits commercial use, subject to its terms and conditions. Copyright (c) 2025 Baidu, Inc. All Rights Reserved.\n\n## Citation\n\nIf you find ERNIE 4.5 useful or wish to use it in your projects, please kindly cite our technical report:\n\n```text\n@misc{ernie2025technicalreport,\n title={ERNIE 4.5 Technical Report},\n author={Baidu-ERNIE-Team},\n year={2025},\n primaryClass={cs.CL},\n howpublished={\url{https://ernie.baidu.com/blog/publication/ERNIE_Technical_Report.pdf}}\n}\n```\n\n",2025-09-08T14:18:31+00:00,[0],"model_finetune_model:unsloth/ERNIE-4.5-21B-A3B-Thinking, model_quantized_model:unsloth/ERNIE-4.5-21B-A3B-Thinking-GGUF, gabriellarson/ERNIE-4.5-21B-A3B-Thinking-GGUF, cpatonn/ERNIE-4.5-21B-A3B-Thinking-AWQ-8bit, cpatonn/ERNIE-4.5-21B-A3B-Thinking-AWQ-4bit, mradermacher/ERNIE-4.5-21B-A3B-Thinking-GGUF, nightmedia/ERNIE-4.5-21B-A3B-Thinking-mxfp4-mlx, wekW/ERNIE-4.5-21B-A3B-Thinking-Q8_0-GGUF","[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",2025-09-10T10:38:04+00:00,[0],,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
5,baidu/ERNIE-4.5-21B-A3B-Thinking,unsloth/ERNIE-4.5-21B-A3B-Thinking-GGUF,model_quantized_model,3,model,model,"---\nlicense: apache-2.0\nlanguage:\n- en\n- zh\npipeline_tag: text-generation\ntags:\n- ERNIE4.5\nlibrary_name: transformers\n---\n\n<div align=""center"" style=""line-height: 1;"">\n <a href=""https://ernie.baidu.com/"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Chat"" src=""https://img.shields.io/badge/🤖_Chat-ERNIE_Bot-blue"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://huggingface.co/baidu"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Hugging Face"" src=""https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Baidu-ffc107?color=ffc107&logoColor=white"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://github.com/PaddlePaddle/ERNIE"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Github"" src=""https://img.shields.io/badge/GitHub-ERNIE-000?logo=github&color=0000FF"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://ernie.baidu.com/blog/ernie4.5"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Blog"" src=""https://img.shields.io/badge/🖖_Blog-ERNIE4.5-A020A0"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://discord.gg/JPmZXDsEEK"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Discord"" src=""https://img.shields.io/badge/Discord-ERNIE-5865F2?logo=discord&logoColor=white"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://x.com/PaddlePaddle"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""X"" src=""https://img.shields.io/badge/X-PaddlePaddle-6080F0""?logo=x&logoColor=white"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n</div>\n\n<div align=""center"" style=""line-height: 1;"">\n <a href=""#license"" style=""margin: 2px;"">\n <img alt=""License"" src=""https://img.shields.io/badge/License-Apache2.0-A5de54"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n</div>\n\n# ERNIE-4.5-21B-A3B-Thinking\n\n## Model Highlights\n\nOver the past three months, we have continued to scale the **thinking capability** of ERNIE-4.5-21B-A3B, improving both the **quality and depth** of reasoning, thereby advancing the competitiveness of ERNIE **lightweight models** in complex reasoning tasks. We are pleased to introduce **ERNIE-4.5-21B-A3B-Thinking**, featuring the following key enhancements:\n\n* **Significantly improved performance** on reasoning tasks, including logical reasoning, mathematics, science, coding, text generation, and academic benchmarks that typically require human expertise.\n* **Efficient tool usage** capabilities.\n* **Enhanced 128K long-context understanding** capabilities.\n\n> [!NOTE]\n> Note: This version has an increased thinking length. We strongly recommend its use in highly complex reasoning tasks.\n\n![benchmark](./benchmark.png)\n\n## Model Overview\n\nERNIE-4.5-21B-A3B-Thinking is a text MoE post-trained model, with 21B total parameters and 3B activated parameters for each token. The following are the model configuration details:\n\n|Key|Value|\n|-|-|\n|Modality|Text|\n|Training Stage|Posttraining|\n|Params(Total / Activated)|21B / 3B|\n|Layers|28|\n|Heads(Q/KV)|20 / 4|\n|Text Experts(Total / Activated)|64 / 6|\n|Shared Experts|2|\n|Context Length|131072|\n\n## Quickstart\n\n> [!NOTE]\n> To align with the wider community, this model releases Transformer-style weights. Both PyTorch and PaddlePaddle ecosystem tools, such as vLLM, transformers, and FastDeploy, are expected to be able to load and run this model.\n\n### FastDeploy Inference\n\nQuickly deploy services using FastDeploy as shown below. For more detailed usage, refer to the [FastDeploy GitHub Repository](https://github.com/PaddlePaddle/FastDeploy).\n\n**Note**: 80GB x 1 GPU resources are required. Deploying this model requires FastDeploy version 2.2.\n\n```bash\npython -m fastdeploy.entrypoints.openai.api_server \\n --model baidu/ERNIE-4.5-21B-A3B-Thinking \\n --port 8180 \\n --metrics-port 8181 \\n --engine-worker-queue-port 8182 \\n --load_choices ""default_v1"" \\n --tensor-parallel-size 1 \\n --max-model-len 131072 \\n --reasoning-parser ernie_x1 \\n --tool-call-parser ernie_x1 \\n --max-num-seqs 32\n```\n\nThe ERNIE-4.5-21B-A3B-Thinking model supports function call.\n\n```bash\ncurl -X POST ""http://0.0.0.0:8180/v1/chat/completions"" \\n-H ""Content-Type: application/json"" \\n-d $'{\n ""messages"": [\n {\n ""role"": ""user"",\n ""content"": ""How \'s the weather in Beijing today?""\n }\n ],\n ""tools"": [\n {\n ""type"": ""function"",\n ""function"": {\n ""name"": ""get_weather"",\n ""description"": ""Determine weather in my location"",\n ""parameters"": {\n ""type"": ""object"",\n ""properties"": {\n ""location"": {\n ""type"": ""string"",\n ""description"": ""The city and state e.g. San Francisco, CA""\n },\n ""unit"": {\n ""type"": ""string"",\n ""enum"": [\n ""c"",\n ""f""\n ]\n }\n },\n ""additionalProperties"": false,\n ""required"": [\n ""location"",\n ""unit""\n ]\n },\n ""strict"": true\n }\n }]\n}'\n```\n\n### vLLM inference\n\n```bash\nvllm serve baidu/ERNIE-4.5-21B-A3B-Thinking\n```\n\nThe `reasoning-parser` and `tool-call-parser` for vLLM Ernie are currently under development.\n\n### Using `transformers` library\n\n**Note**: You'll need the`transformers`library (version 4.54.0 or newer) installed to use this model.\n\nThe following contains a code snippet illustrating how to use the model generate content based on given inputs.\n\n```python\nimport torch\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\nmodel_name = ""baidu/ERNIE-4.5-21B-A3B-Thinking""\n\n# load the tokenizer and the model\ntokenizer = AutoTokenizer.from_pretrained(model_name)\nmodel = AutoModelForCausalLM.from_pretrained(\n model_name,\n device_map=""auto"",\n torch_dtype=torch.bfloat16,\n)\n\n# prepare the model input\nprompt = ""Give me a short introduction to large language model.""\nmessages = [\n {""role"": ""user"", ""content"": prompt}\n]\ntext = tokenizer.apply_chat_template(\n messages,\n tokenize=False,\n add_generation_prompt=True\n)\nmodel_inputs = tokenizer([text], add_special_tokens=False, return_tensors=""pt"").to(model.device)\n\n# conduct text completion\ngenerated_ids = model.generate(\n **model_inputs,\n max_new_tokens=1024\n)\noutput_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()\n\n# decode the generated ids\ngenerate_text = tokenizer.decode(output_ids, skip_special_tokens=True)\nprint(""generate_text:"", generate_text)\n```\n\n## License\n\nThe ERNIE 4.5 models are provided under the Apache License 2.0. This license permits commercial use, subject to its terms and conditions. Copyright (c) 2025 Baidu, Inc. All Rights Reserved.\n\n## Citation\n\nIf you find ERNIE 4.5 useful or wish to use it in your projects, please kindly cite our technical report:\n\n```text\n@misc{ernie2025technicalreport,\n title={ERNIE 4.5 Technical Report},\n author={Baidu-ERNIE-Team},\n year={2025},\n primaryClass={cs.CL},\n howpublished={\url{https://ernie.baidu.com/blog/publication/ERNIE_Technical_Report.pdf}}\n}\n```\n\n","---\nlicense: apache-2.0\nlanguage:\n- en\n- zh\npipeline_tag: text-generation\ntags:\n- ERNIE4.5\nlibrary_name: transformers\nbase_model: baidu/ERNIE-4.5-21B-A3B-Thinking\n---\n\n<div align=""center"" style=""line-height: 1;"">\n <a href=""https://ernie.baidu.com/"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Chat"" src=""https://img.shields.io/badge/🤖_Chat-ERNIE_Bot-blue"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://huggingface.co/baidu"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Hugging Face"" src=""https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Baidu-ffc107?color=ffc107&logoColor=white"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://github.com/PaddlePaddle/ERNIE"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Github"" src=""https://img.shields.io/badge/GitHub-ERNIE-000?logo=github&color=0000FF"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://ernie.baidu.com/blog/ernie4.5"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Blog"" src=""https://img.shields.io/badge/🖖_Blog-ERNIE4.5-A020A0"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://discord.gg/JPmZXDsEEK"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Discord"" src=""https://img.shields.io/badge/Discord-ERNIE-5865F2?logo=discord&logoColor=white"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://x.com/PaddlePaddle"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""X"" src=""https://img.shields.io/badge/X-PaddlePaddle-6080F0""?logo=x&logoColor=white"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n</div>\n\n<div align=""center"" style=""line-height: 1;"">\n <a href=""#license"" style=""margin: 2px;"">\n <img alt=""License"" src=""https://img.shields.io/badge/License-Apache2.0-A5de54"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n</div>\n\n# ERNIE-4.5-21B-A3B-Thinking\n\n## Model Highlights\n\nOver the past three months, we have continued to scale the **thinking capability** of ERNIE-4.5-21B-A3B, improving both the **quality and depth** of reasoning, thereby advancing the competitiveness of ERNIE **lightweight models** in complex reasoning tasks. We are pleased to introduce **ERNIE-4.5-21B-A3B-Thinking**, featuring the following key enhancements:\n\n* **Significantly improved performance** on reasoning tasks, including logical reasoning, mathematics, science, coding, text generation, and academic benchmarks that typically require human expertise.\n* **Efficient tool usage** capabilities.\n* **Enhanced 128K long-context understanding** capabilities.\n\n> [!NOTE]\n> Note: This version has an increased thinking length. We strongly recommend its use in highly complex reasoning tasks.\n\n![benchmark](./benchmark.png)\n\n## Model Overview\n\nERNIE-4.5-21B-A3B-Thinking is a text MoE post-trained model, with 21B total parameters and 3B activated parameters for each token. The following are the model configuration details:\n\n|Key|Value|\n|-|-|\n|Modality|Text|\n|Training Stage|Posttraining|\n|Params(Total / Activated)|21B / 3B|\n|Layers|28|\n|Heads(Q/KV)|20 / 4|\n|Text Experts(Total / Activated)|64 / 6|\n|Vision Experts(Total / Activated)|64 / 6|\n|Shared Experts|2|\n|Context Length|131072|\n\n## Quickstart\n\n> [!NOTE]\n> To align with the wider community, this model releases Transformer-style weights. Both PyTorch and PaddlePaddle ecosystem tools, such as vLLM, transformers, and FastDeploy, are expected to be able to load and run this model.\n\n### FastDeploy Inference\n\nQuickly deploy services using FastDeploy as shown below. For more detailed usage, refer to the [FastDeploy GitHub Repository](https://github.com/PaddlePaddle/FastDeploy).\n\n**Note**: 80GB x 1 GPU resources are required. Deploying this model requires FastDeploy version 2.2.\n\n```bash\npython -m fastdeploy.entrypoints.openai.api_server \\n --model baidu/ERNIE-4.5-21B-A3B-Thinking \\n --port 8180 \\n --metrics-port 8181 \\n --engine-worker-queue-port 8182 \\n --load_choices ""default_v1"" \\n --tensor-parallel-size 1 \\n --max-model-len 131072 \\n --reasoning-parser ernie_x1 \\n --tool-call-parser ernie_x1 \\n --max-num-seqs 32\n```\n\nThe ERNIE-4.5-21B-A3B-Thinking model supports function call.\n\n```bash\ncurl -X POST ""http://0.0.0.0:8180/v1/chat/completions"" \\n-H ""Content-Type: application/json"" \\n-d $'{\n ""messages"": [\n {\n ""role"": ""user"",\n ""content"": ""How \'s the weather in Beijing today?""\n }\n ],\n ""tools"": [\n {\n ""type"": ""function"",\n ""function"": {\n ""name"": ""get_weather"",\n ""description"": ""Determine weather in my location"",\n ""parameters"": {\n ""type"": ""object"",\n ""properties"": {\n ""location"": {\n ""type"": ""string"",\n ""description"": ""The city and state e.g. San Francisco, CA""\n },\n ""unit"": {\n ""type"": ""string"",\n ""enum"": [\n ""c"",\n ""f""\n ]\n }\n },\n ""additionalProperties"": false,\n ""required"": [\n ""location"",\n ""unit""\n ]\n },\n ""strict"": true\n }\n }]\n}'\n```\n\n### vLLM inference\n\n```bash\nvllm serve baidu/ERNIE-4.5-21B-A3B-Thinking\n```\n\nThe `reasoning-parser` and `tool-call-parser` for vLLM Ernie are currently under development.\n\n### Using `transformers` library\n\n**Note**: You'll need the`transformers`library (version 4.54.0 or newer) installed to use this model.\n\nThe following contains a code snippet illustrating how to use the model generate content based on given inputs.\n\n```python\nimport torch\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\nmodel_name = ""baidu/ERNIE-4.5-21B-A3B-Thinking""\n\n# load the tokenizer and the model\ntokenizer = AutoTokenizer.from_pretrained(model_name)\nmodel = AutoModelForCausalLM.from_pretrained(\n model_name,\n device_map=""auto"",\n torch_dtype=torch.bfloat16,\n)\n\n# prepare the model input\nprompt = ""Give me a short introduction to large language model.""\nmessages = [\n {""role"": ""user"", ""content"": prompt}\n]\ntext = tokenizer.apply_chat_template(\n messages,\n tokenize=False,\n add_generation_prompt=True\n)\nmodel_inputs = tokenizer([text], add_special_tokens=False, return_tensors=""pt"").to(model.device)\n\n# conduct text completion\ngenerated_ids = model.generate(\n **model_inputs,\n max_new_tokens=1024\n)\noutput_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()\n\n# decode the generated ids\ngenerate_text = tokenizer.decode(output_ids, skip_special_tokens=True)\nprint(""generate_text:"", generate_text)\n```\n\n## License\n\nThe ERNIE 4.5 models are provided under the Apache License 2.0. This license permits commercial use, subject to its terms and conditions. Copyright (c) 2025 Baidu, Inc. All Rights Reserved.\n\n## Citation\n\nIf you find ERNIE 4.5 useful or wish to use it in your projects, please kindly cite our technical report:\n\n```text\n@misc{ernie2025technicalreport,\n title={ERNIE 4.5 Technical Report},\n author={Baidu-ERNIE-Team},\n year={2025},\n primaryClass={cs.CL},\n howpublished={\url{https://ernie.baidu.com/blog/publication/ERNIE_Technical_Report.pdf}}\n}\n```\n\n",2025-09-08T14:18:31+00:00,[0],"model_finetune_model:unsloth/ERNIE-4.5-21B-A3B-Thinking, model_quantized_model:unsloth/ERNIE-4.5-21B-A3B-Thinking-GGUF, gabriellarson/ERNIE-4.5-21B-A3B-Thinking-GGUF, cpatonn/ERNIE-4.5-21B-A3B-Thinking-AWQ-8bit, cpatonn/ERNIE-4.5-21B-A3B-Thinking-AWQ-4bit, mradermacher/ERNIE-4.5-21B-A3B-Thinking-GGUF, nightmedia/ERNIE-4.5-21B-A3B-Thinking-mxfp4-mlx, wekW/ERNIE-4.5-21B-A3B-Thinking-Q8_0-GGUF","[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",2025-09-10T11:01:33+00:00,[0],,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
6,baidu/ERNIE-4.5-21B-A3B-Thinking,gabriellarson/ERNIE-4.5-21B-A3B-Thinking-GGUF,model_quantized_model,3,model,model,"---\nlicense: apache-2.0\nlanguage:\n- en\n- zh\npipeline_tag: text-generation\ntags:\n- ERNIE4.5\nlibrary_name: transformers\n---\n\n<div align=""center"" style=""line-height: 1;"">\n <a href=""https://ernie.baidu.com/"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Chat"" src=""https://img.shields.io/badge/🤖_Chat-ERNIE_Bot-blue"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://huggingface.co/baidu"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Hugging Face"" src=""https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Baidu-ffc107?color=ffc107&logoColor=white"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://github.com/PaddlePaddle/ERNIE"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Github"" src=""https://img.shields.io/badge/GitHub-ERNIE-000?logo=github&color=0000FF"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://ernie.baidu.com/blog/ernie4.5"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Blog"" src=""https://img.shields.io/badge/🖖_Blog-ERNIE4.5-A020A0"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://discord.gg/JPmZXDsEEK"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Discord"" src=""https://img.shields.io/badge/Discord-ERNIE-5865F2?logo=discord&logoColor=white"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://x.com/PaddlePaddle"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""X"" src=""https://img.shields.io/badge/X-PaddlePaddle-6080F0""?logo=x&logoColor=white"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n</div>\n\n<div align=""center"" style=""line-height: 1;"">\n <a href=""#license"" style=""margin: 2px;"">\n <img alt=""License"" src=""https://img.shields.io/badge/License-Apache2.0-A5de54"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n</div>\n\n# ERNIE-4.5-21B-A3B-Thinking\n\n## Model Highlights\n\nOver the past three months, we have continued to scale the **thinking capability** of ERNIE-4.5-21B-A3B, improving both the **quality and depth** of reasoning, thereby advancing the competitiveness of ERNIE **lightweight models** in complex reasoning tasks. We are pleased to introduce **ERNIE-4.5-21B-A3B-Thinking**, featuring the following key enhancements:\n\n* **Significantly improved performance** on reasoning tasks, including logical reasoning, mathematics, science, coding, text generation, and academic benchmarks that typically require human expertise.\n* **Efficient tool usage** capabilities.\n* **Enhanced 128K long-context understanding** capabilities.\n\n> [!NOTE]\n> Note: This version has an increased thinking length. We strongly recommend its use in highly complex reasoning tasks.\n\n![benchmark](./benchmark.png)\n\n## Model Overview\n\nERNIE-4.5-21B-A3B-Thinking is a text MoE post-trained model, with 21B total parameters and 3B activated parameters for each token. The following are the model configuration details:\n\n|Key|Value|\n|-|-|\n|Modality|Text|\n|Training Stage|Posttraining|\n|Params(Total / Activated)|21B / 3B|\n|Layers|28|\n|Heads(Q/KV)|20 / 4|\n|Text Experts(Total / Activated)|64 / 6|\n|Shared Experts|2|\n|Context Length|131072|\n\n## Quickstart\n\n> [!NOTE]\n> To align with the wider community, this model releases Transformer-style weights. Both PyTorch and PaddlePaddle ecosystem tools, such as vLLM, transformers, and FastDeploy, are expected to be able to load and run this model.\n\n### FastDeploy Inference\n\nQuickly deploy services using FastDeploy as shown below. For more detailed usage, refer to the [FastDeploy GitHub Repository](https://github.com/PaddlePaddle/FastDeploy).\n\n**Note**: 80GB x 1 GPU resources are required. Deploying this model requires FastDeploy version 2.2.\n\n```bash\npython -m fastdeploy.entrypoints.openai.api_server \\n --model baidu/ERNIE-4.5-21B-A3B-Thinking \\n --port 8180 \\n --metrics-port 8181 \\n --engine-worker-queue-port 8182 \\n --load_choices ""default_v1"" \\n --tensor-parallel-size 1 \\n --max-model-len 131072 \\n --reasoning-parser ernie_x1 \\n --tool-call-parser ernie_x1 \\n --max-num-seqs 32\n```\n\nThe ERNIE-4.5-21B-A3B-Thinking model supports function call.\n\n```bash\ncurl -X POST ""http://0.0.0.0:8180/v1/chat/completions"" \\n-H ""Content-Type: application/json"" \\n-d $'{\n ""messages"": [\n {\n ""role"": ""user"",\n ""content"": ""How \'s the weather in Beijing today?""\n }\n ],\n ""tools"": [\n {\n ""type"": ""function"",\n ""function"": {\n ""name"": ""get_weather"",\n ""description"": ""Determine weather in my location"",\n ""parameters"": {\n ""type"": ""object"",\n ""properties"": {\n ""location"": {\n ""type"": ""string"",\n ""description"": ""The city and state e.g. San Francisco, CA""\n },\n ""unit"": {\n ""type"": ""string"",\n ""enum"": [\n ""c"",\n ""f""\n ]\n }\n },\n ""additionalProperties"": false,\n ""required"": [\n ""location"",\n ""unit""\n ]\n },\n ""strict"": true\n }\n }]\n}'\n```\n\n### vLLM inference\n\n```bash\nvllm serve baidu/ERNIE-4.5-21B-A3B-Thinking\n```\n\nThe `reasoning-parser` and `tool-call-parser` for vLLM Ernie are currently under development.\n\n### Using `transformers` library\n\n**Note**: You'll need the`transformers`library (version 4.54.0 or newer) installed to use this model.\n\nThe following contains a code snippet illustrating how to use the model generate content based on given inputs.\n\n```python\nimport torch\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\nmodel_name = ""baidu/ERNIE-4.5-21B-A3B-Thinking""\n\n# load the tokenizer and the model\ntokenizer = AutoTokenizer.from_pretrained(model_name)\nmodel = AutoModelForCausalLM.from_pretrained(\n model_name,\n device_map=""auto"",\n torch_dtype=torch.bfloat16,\n)\n\n# prepare the model input\nprompt = ""Give me a short introduction to large language model.""\nmessages = [\n {""role"": ""user"", ""content"": prompt}\n]\ntext = tokenizer.apply_chat_template(\n messages,\n tokenize=False,\n add_generation_prompt=True\n)\nmodel_inputs = tokenizer([text], add_special_tokens=False, return_tensors=""pt"").to(model.device)\n\n# conduct text completion\ngenerated_ids = model.generate(\n **model_inputs,\n max_new_tokens=1024\n)\noutput_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()\n\n# decode the generated ids\ngenerate_text = tokenizer.decode(output_ids, skip_special_tokens=True)\nprint(""generate_text:"", generate_text)\n```\n\n## License\n\nThe ERNIE 4.5 models are provided under the Apache License 2.0. This license permits commercial use, subject to its terms and conditions. Copyright (c) 2025 Baidu, Inc. All Rights Reserved.\n\n## Citation\n\nIf you find ERNIE 4.5 useful or wish to use it in your projects, please kindly cite our technical report:\n\n```text\n@misc{ernie2025technicalreport,\n title={ERNIE 4.5 Technical Report},\n author={Baidu-ERNIE-Team},\n year={2025},\n primaryClass={cs.CL},\n howpublished={\url{https://ernie.baidu.com/blog/publication/ERNIE_Technical_Report.pdf}}\n}\n```\n\n","---\nlicense: apache-2.0\nlanguage:\n- en\n- zh\npipeline_tag: text-generation\ntags:\n- ERNIE4.5\nlibrary_name: transformers\nbase_model:\n- baidu/ERNIE-4.5-21B-A3B-Thinking\n---\n<div align=""center"" style=""line-height: 1;"">\n <a href=""https://ernie.baidu.com/"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Chat"" src=""https://img.shields.io/badge/🤖_Chat-ERNIE_Bot-blue"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://huggingface.co/baidu"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Hugging Face"" src=""https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Baidu-ffc107?color=ffc107&logoColor=white"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://github.com/PaddlePaddle/ERNIE"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Github"" src=""https://img.shields.io/badge/GitHub-ERNIE-000?logo=github&color=0000FF"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://ernie.baidu.com/blog/ernie4.5"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Blog"" src=""https://img.shields.io/badge/🖖_Blog-ERNIE4.5-A020A0"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://discord.gg/JPmZXDsEEK"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Discord"" src=""https://img.shields.io/badge/Discord-ERNIE-5865F2?logo=discord&logoColor=white"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://x.com/PaddlePaddle"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""X"" src=""https://img.shields.io/badge/X-PaddlePaddle-6080F0""?logo=x&logoColor=white"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n</div>\n<div align=""center"" style=""line-height: 1;"">\n <a href=""#license"" style=""margin: 2px;"">\n <img alt=""License"" src=""https://img.shields.io/badge/License-Apache2.0-A5de54"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n</div>\n# ERNIE-4.5-21B-A3B-Thinking\n\n## Model Highlights\n\nOver the past three months, we have continued to scale the **thinking capability** of ERNIE-4.5-21B-A3B, improving both the **quality and depth** of reasoning, thereby advancing the competitiveness of ERNIE **lightweight models** in complex reasoning tasks. We are pleased to introduce **ERNIE-4.5-21B-A3B-Thinking**, featuring the following key enhancements:\n\n* **Significantly improved performance** on reasoning tasks, including logical reasoning, mathematics, science, coding, text generation, and academic benchmarks that typically require human expertise.\n* **Efficient tool usage** capabilities.\n* **Enhanced 128K long-context understanding** capabilities.\n\n> [!NOTE]\n> Note: This version has an increased thinking length. We strongly recommend its use in highly complex reasoning tasks.\n\n![benchmark](./benchmark.png)\n\n## Model Overview\n\nERNIE-4.5-21B-A3B-Thinking is a text MoE post-trained model, with 21B total parameters and 3B activated parameters for each token. The following are the model configuration details:\n\n|Key|Value|\n|-|-|\n|Modality|Text|\n|Training Stage|Posttraining|\n|Params(Total / Activated)|21B / 3B|\n|Layers|28|\n|Heads(Q/KV)|20 / 4|\n|Text Experts(Total / Activated)|64 / 6|\n|Vision Experts(Total / Activated)|64 / 6|\n|Shared Experts|2|\n|Context Length|131072|\n\n## Quickstart\n\n> [!NOTE]\n> To align with the wider community, this model releases Transformer-style weights. Both PyTorch and PaddlePaddle ecosystem tools, such as vLLM, transformers, and FastDeploy, are expected to be able to load and run this model.\n\n### FastDeploy Inference\n\nQuickly deploy services using FastDeploy as shown below. For more detailed usage, refer to the [FastDeploy GitHub Repository](https://github.com/PaddlePaddle/FastDeploy).\n\n**Note**: 80GB x 1 GPU resources are required. Deploying this model requires FastDeploy version 2.2.\n\n```bash\npython -m fastdeploy.entrypoints.openai.api_server \\n --model baidu/ERNIE-4.5-21B-A3B-Thinking \\n --port 8180 \\n --metrics-port 8181 \\n --engine-worker-queue-port 8182 \\n --load_choices ""default_v1"" \\n --tensor-parallel-size 1 \\n --max-model-len 131072 \\n --reasoning-parser ernie_x1 \\n --tool-call-parser ernie_x1 \\n --max-num-seqs 32\n```\n\nThe ERNIE-4.5-21B-A3B-Thinking model supports function call.\n\n```bash\ncurl -X POST ""http://0.0.0.0:8180/v1/chat/completions"" \\n-H ""Content-Type: application/json"" \\n-d $'{\n ""messages"": [\n {\n ""role"": ""user"",\n ""content"": ""How \'s the weather in Beijing today?""\n }\n ],\n ""tools"": [\n {\n ""type"": ""function"",\n ""function"": {\n ""name"": ""get_weather"",\n ""description"": ""Determine weather in my location"",\n ""parameters"": {\n ""type"": ""object"",\n ""properties"": {\n ""location"": {\n ""type"": ""string"",\n ""description"": ""The city and state e.g. San Francisco, CA""\n },\n ""unit"": {\n ""type"": ""string"",\n ""enum"": [\n ""c"",\n ""f""\n ]\n }\n },\n ""additionalProperties"": false,\n ""required"": [\n ""location"",\n ""unit""\n ]\n },\n ""strict"": true\n }\n }]\n}'\n```\n\n### vLLM inference\n\n```bash\nvllm serve baidu/ERNIE-4.5-21B-A3B-Thinking\n```\n\nThe `reasoning-parser` and `tool-call-parser` for vLLM Ernie are currently under development.\n\n### Using `transformers` library\n\n**Note**: You'll need the`transformers`library (version 4.54.0 or newer) installed to use this model.\n\nThe following contains a code snippet illustrating how to use the model generate content based on given inputs.\n\n```python\nimport torch\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\nmodel_name = ""baidu/ERNIE-4.5-21B-A3B-Thinking""\n# load the tokenizer and the model\ntokenizer = AutoTokenizer.from_pretrained(model_name)\nmodel = AutoModelForCausalLM.from_pretrained(\n model_name,\n device_map=""auto"",\n torch_dtype=torch.bfloat16,\n)\n# prepare the model input\nprompt = ""Give me a short introduction to large language model.""\nmessages = [\n {""role"": ""user"", ""content"": prompt}\n]\ntext = tokenizer.apply_chat_template(\n messages,\n tokenize=False,\n add_generation_prompt=True\n)\nmodel_inputs = tokenizer([text], add_special_tokens=False, return_tensors=""pt"").to(model.device)\n# conduct text completion\ngenerated_ids = model.generate(\n **model_inputs,\n max_new_tokens=1024\n)\noutput_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()\n# decode the generated ids\ngenerate_text = tokenizer.decode(output_ids, skip_special_tokens=True)\nprint(""generate_text:"", generate_text)\n```\n\n## License\n\nThe ERNIE 4.5 models are provided under the Apache License 2.0. This license permits commercial use, subject to its terms and conditions. Copyright (c) 2025 Baidu, Inc. All Rights Reserved.\n\n## Citation\n\nIf you find ERNIE 4.5 useful or wish to use it in your projects, please kindly cite our technical report:\n\n```text\n@misc{ernie2025technicalreport,\n title={ERNIE 4.5 Technical Report},\n author={Baidu-ERNIE-Team},\n year={2025},\n primaryClass={cs.CL},\n howpublished={\url{https://ernie.baidu.com/blog/publication/ERNIE_Technical_Report.pdf}}\n}\n```",2025-09-08T14:18:31+00:00,[0],"model_finetune_model:unsloth/ERNIE-4.5-21B-A3B-Thinking, model_quantized_model:unsloth/ERNIE-4.5-21B-A3B-Thinking-GGUF, gabriellarson/ERNIE-4.5-21B-A3B-Thinking-GGUF, cpatonn/ERNIE-4.5-21B-A3B-Thinking-AWQ-8bit, cpatonn/ERNIE-4.5-21B-A3B-Thinking-AWQ-4bit, mradermacher/ERNIE-4.5-21B-A3B-Thinking-GGUF, nightmedia/ERNIE-4.5-21B-A3B-Thinking-mxfp4-mlx, wekW/ERNIE-4.5-21B-A3B-Thinking-Q8_0-GGUF","[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",2025-09-09T01:16:03+00:00,[0],,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
7,baidu/ERNIE-4.5-21B-A3B-Thinking,cpatonn/ERNIE-4.5-21B-A3B-Thinking-AWQ-8bit,model_quantized_model,3,model,model,"---\nlicense: apache-2.0\nlanguage:\n- en\n- zh\npipeline_tag: text-generation\ntags:\n- ERNIE4.5\nlibrary_name: transformers\n---\n\n<div align=""center"" style=""line-height: 1;"">\n <a href=""https://ernie.baidu.com/"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Chat"" src=""https://img.shields.io/badge/🤖_Chat-ERNIE_Bot-blue"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://huggingface.co/baidu"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Hugging Face"" src=""https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Baidu-ffc107?color=ffc107&logoColor=white"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://github.com/PaddlePaddle/ERNIE"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Github"" src=""https://img.shields.io/badge/GitHub-ERNIE-000?logo=github&color=0000FF"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://ernie.baidu.com/blog/ernie4.5"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Blog"" src=""https://img.shields.io/badge/🖖_Blog-ERNIE4.5-A020A0"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://discord.gg/JPmZXDsEEK"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Discord"" src=""https://img.shields.io/badge/Discord-ERNIE-5865F2?logo=discord&logoColor=white"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://x.com/PaddlePaddle"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""X"" src=""https://img.shields.io/badge/X-PaddlePaddle-6080F0""?logo=x&logoColor=white"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n</div>\n\n<div align=""center"" style=""line-height: 1;"">\n <a href=""#license"" style=""margin: 2px;"">\n <img alt=""License"" src=""https://img.shields.io/badge/License-Apache2.0-A5de54"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n</div>\n\n# ERNIE-4.5-21B-A3B-Thinking\n\n## Model Highlights\n\nOver the past three months, we have continued to scale the **thinking capability** of ERNIE-4.5-21B-A3B, improving both the **quality and depth** of reasoning, thereby advancing the competitiveness of ERNIE **lightweight models** in complex reasoning tasks. We are pleased to introduce **ERNIE-4.5-21B-A3B-Thinking**, featuring the following key enhancements:\n\n* **Significantly improved performance** on reasoning tasks, including logical reasoning, mathematics, science, coding, text generation, and academic benchmarks that typically require human expertise.\n* **Efficient tool usage** capabilities.\n* **Enhanced 128K long-context understanding** capabilities.\n\n> [!NOTE]\n> Note: This version has an increased thinking length. We strongly recommend its use in highly complex reasoning tasks.\n\n![benchmark](./benchmark.png)\n\n## Model Overview\n\nERNIE-4.5-21B-A3B-Thinking is a text MoE post-trained model, with 21B total parameters and 3B activated parameters for each token. The following are the model configuration details:\n\n|Key|Value|\n|-|-|\n|Modality|Text|\n|Training Stage|Posttraining|\n|Params(Total / Activated)|21B / 3B|\n|Layers|28|\n|Heads(Q/KV)|20 / 4|\n|Text Experts(Total / Activated)|64 / 6|\n|Shared Experts|2|\n|Context Length|131072|\n\n## Quickstart\n\n> [!NOTE]\n> To align with the wider community, this model releases Transformer-style weights. Both PyTorch and PaddlePaddle ecosystem tools, such as vLLM, transformers, and FastDeploy, are expected to be able to load and run this model.\n\n### FastDeploy Inference\n\nQuickly deploy services using FastDeploy as shown below. For more detailed usage, refer to the [FastDeploy GitHub Repository](https://github.com/PaddlePaddle/FastDeploy).\n\n**Note**: 80GB x 1 GPU resources are required. Deploying this model requires FastDeploy version 2.2.\n\n```bash\npython -m fastdeploy.entrypoints.openai.api_server \\n --model baidu/ERNIE-4.5-21B-A3B-Thinking \\n --port 8180 \\n --metrics-port 8181 \\n --engine-worker-queue-port 8182 \\n --load_choices ""default_v1"" \\n --tensor-parallel-size 1 \\n --max-model-len 131072 \\n --reasoning-parser ernie_x1 \\n --tool-call-parser ernie_x1 \\n --max-num-seqs 32\n```\n\nThe ERNIE-4.5-21B-A3B-Thinking model supports function call.\n\n```bash\ncurl -X POST ""http://0.0.0.0:8180/v1/chat/completions"" \\n-H ""Content-Type: application/json"" \\n-d $'{\n ""messages"": [\n {\n ""role"": ""user"",\n ""content"": ""How \'s the weather in Beijing today?""\n }\n ],\n ""tools"": [\n {\n ""type"": ""function"",\n ""function"": {\n ""name"": ""get_weather"",\n ""description"": ""Determine weather in my location"",\n ""parameters"": {\n ""type"": ""object"",\n ""properties"": {\n ""location"": {\n ""type"": ""string"",\n ""description"": ""The city and state e.g. San Francisco, CA""\n },\n ""unit"": {\n ""type"": ""string"",\n ""enum"": [\n ""c"",\n ""f""\n ]\n }\n },\n ""additionalProperties"": false,\n ""required"": [\n ""location"",\n ""unit""\n ]\n },\n ""strict"": true\n }\n }]\n}'\n```\n\n### vLLM inference\n\n```bash\nvllm serve baidu/ERNIE-4.5-21B-A3B-Thinking\n```\n\nThe `reasoning-parser` and `tool-call-parser` for vLLM Ernie are currently under development.\n\n### Using `transformers` library\n\n**Note**: You'll need the`transformers`library (version 4.54.0 or newer) installed to use this model.\n\nThe following contains a code snippet illustrating how to use the model generate content based on given inputs.\n\n```python\nimport torch\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\nmodel_name = ""baidu/ERNIE-4.5-21B-A3B-Thinking""\n\n# load the tokenizer and the model\ntokenizer = AutoTokenizer.from_pretrained(model_name)\nmodel = AutoModelForCausalLM.from_pretrained(\n model_name,\n device_map=""auto"",\n torch_dtype=torch.bfloat16,\n)\n\n# prepare the model input\nprompt = ""Give me a short introduction to large language model.""\nmessages = [\n {""role"": ""user"", ""content"": prompt}\n]\ntext = tokenizer.apply_chat_template(\n messages,\n tokenize=False,\n add_generation_prompt=True\n)\nmodel_inputs = tokenizer([text], add_special_tokens=False, return_tensors=""pt"").to(model.device)\n\n# conduct text completion\ngenerated_ids = model.generate(\n **model_inputs,\n max_new_tokens=1024\n)\noutput_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()\n\n# decode the generated ids\ngenerate_text = tokenizer.decode(output_ids, skip_special_tokens=True)\nprint(""generate_text:"", generate_text)\n```\n\n## License\n\nThe ERNIE 4.5 models are provided under the Apache License 2.0. This license permits commercial use, subject to its terms and conditions. Copyright (c) 2025 Baidu, Inc. All Rights Reserved.\n\n## Citation\n\nIf you find ERNIE 4.5 useful or wish to use it in your projects, please kindly cite our technical report:\n\n```text\n@misc{ernie2025technicalreport,\n title={ERNIE 4.5 Technical Report},\n author={Baidu-ERNIE-Team},\n year={2025},\n primaryClass={cs.CL},\n howpublished={\url{https://ernie.baidu.com/blog/publication/ERNIE_Technical_Report.pdf}}\n}\n```\n\n","---\nlicense: apache-2.0\nlanguage:\n- en\n- zh\npipeline_tag: text-generation\ntags:\n- ERNIE4.5\nlibrary_name: transformers\nbase_model:\n- baidu/ERNIE-4.5-21B-A3B-Thinking\n---\n\n<div align=""center"" style=""line-height: 1;"">\n <a href=""https://ernie.baidu.com/"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Chat"" src=""https://img.shields.io/badge/🤖_Chat-ERNIE_Bot-blue"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://huggingface.co/baidu"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Hugging Face"" src=""https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Baidu-ffc107?color=ffc107&logoColor=white"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://github.com/PaddlePaddle/ERNIE"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Github"" src=""https://img.shields.io/badge/GitHub-ERNIE-000?logo=github&color=0000FF"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://ernie.baidu.com/blog/ernie4.5"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Blog"" src=""https://img.shields.io/badge/🖖_Blog-ERNIE4.5-A020A0"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://discord.gg/JPmZXDsEEK"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Discord"" src=""https://img.shields.io/badge/Discord-ERNIE-5865F2?logo=discord&logoColor=white"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://x.com/PaddlePaddle"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""X"" src=""https://img.shields.io/badge/X-PaddlePaddle-6080F0""?logo=x&logoColor=white"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n</div>\n\n<div align=""center"" style=""line-height: 1;"">\n <a href=""#license"" style=""margin: 2px;"">\n <img alt=""License"" src=""https://img.shields.io/badge/License-Apache2.0-A5de54"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n</div>\n\n# ERNIE-4.5-21B-A3B-Thinking\n\n## Model Highlights\n\nOver the past three months, we have continued to scale the **thinking capability** of ERNIE-4.5-21B-A3B, improving both the **quality and depth** of reasoning, thereby advancing the competitiveness of ERNIE **lightweight models** in complex reasoning tasks. We are pleased to introduce **ERNIE-4.5-21B-A3B-Thinking**, featuring the following key enhancements:\n\n* **Significantly improved performance** on reasoning tasks, including logical reasoning, mathematics, science, coding, text generation, and academic benchmarks that typically require human expertise.\n* **Efficient tool usage** capabilities.\n* **Enhanced 128K long-context understanding** capabilities.\n\n> [!NOTE]\n> Note: This version has an increased thinking length. We strongly recommend its use in highly complex reasoning tasks.\n\n![benchmark](./benchmark.png)\n\n## Model Overview\n\nERNIE-4.5-21B-A3B-Thinking is a text MoE post-trained model, with 21B total parameters and 3B activated parameters for each token. The following are the model configuration details:\n\n|Key|Value|\n|-|-|\n|Modality|Text|\n|Training Stage|Posttraining|\n|Params(Total / Activated)|21B / 3B|\n|Layers|28|\n|Heads(Q/KV)|20 / 4|\n|Text Experts(Total / Activated)|64 / 6|\n|Vision Experts(Total / Activated)|64 / 6|\n|Shared Experts|2|\n|Context Length|131072|\n\n## Quickstart\n\n> [!NOTE]\n> To align with the wider community, this model releases Transformer-style weights. Both PyTorch and PaddlePaddle ecosystem tools, such as vLLM, transformers, and FastDeploy, are expected to be able to load and run this model.\n\n### FastDeploy Inference\n\nQuickly deploy services using FastDeploy as shown below. For more detailed usage, refer to the [FastDeploy GitHub Repository](https://github.com/PaddlePaddle/FastDeploy).\n\n**Note**: 80GB x 1 GPU resources are required. Deploying this model requires FastDeploy version 2.2.\n\n```bash\npython -m fastdeploy.entrypoints.openai.api_server \\n --model baidu/ERNIE-4.5-21B-A3B-Thinking \\n --port 8180 \\n --metrics-port 8181 \\n --engine-worker-queue-port 8182 \\n --load_choices ""default_v1"" \\n --tensor-parallel-size 1 \\n --max-model-len 131072 \\n --reasoning-parser ernie_x1 \\n --tool-call-parser ernie_x1 \\n --max-num-seqs 32\n```\n\nThe ERNIE-4.5-21B-A3B-Thinking model supports function call.\n\n```bash\ncurl -X POST ""http://0.0.0.0:8180/v1/chat/completions"" \\n-H ""Content-Type: application/json"" \\n-d $'{\n ""messages"": [\n {\n ""role"": ""user"",\n ""content"": ""How \'s the weather in Beijing today?""\n }\n ],\n ""tools"": [\n {\n ""type"": ""function"",\n ""function"": {\n ""name"": ""get_weather"",\n ""description"": ""Determine weather in my location"",\n ""parameters"": {\n ""type"": ""object"",\n ""properties"": {\n ""location"": {\n ""type"": ""string"",\n ""description"": ""The city and state e.g. San Francisco, CA""\n },\n ""unit"": {\n ""type"": ""string"",\n ""enum"": [\n ""c"",\n ""f""\n ]\n }\n },\n ""additionalProperties"": false,\n ""required"": [\n ""location"",\n ""unit""\n ]\n },\n ""strict"": true\n }\n }]\n}'\n```\n\n### vLLM inference\n\n```bash\nvllm serve baidu/ERNIE-4.5-21B-A3B-Thinking\n```\n\nThe `reasoning-parser` and `tool-call-parser` for vLLM Ernie are currently under development.\n\n### Using `transformers` library\n\n**Note**: You'll need the`transformers`library (version 4.54.0 or newer) installed to use this model.\n\nThe following contains a code snippet illustrating how to use the model generate content based on given inputs.\n\n```python\nimport torch\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\nmodel_name = ""baidu/ERNIE-4.5-21B-A3B-Thinking""\n\n# load the tokenizer and the model\ntokenizer = AutoTokenizer.from_pretrained(model_name)\nmodel = AutoModelForCausalLM.from_pretrained(\n model_name,\n device_map=""auto"",\n torch_dtype=torch.bfloat16,\n)\n\n# prepare the model input\nprompt = ""Give me a short introduction to large language model.""\nmessages = [\n {""role"": ""user"", ""content"": prompt}\n]\ntext = tokenizer.apply_chat_template(\n messages,\n tokenize=False,\n add_generation_prompt=True\n)\nmodel_inputs = tokenizer([text], add_special_tokens=False, return_tensors=""pt"").to(model.device)\n\n# conduct text completion\ngenerated_ids = model.generate(\n **model_inputs,\n max_new_tokens=1024\n)\noutput_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()\n\n# decode the generated ids\ngenerate_text = tokenizer.decode(output_ids, skip_special_tokens=True)\nprint(""generate_text:"", generate_text)\n```\n\n## License\n\nThe ERNIE 4.5 models are provided under the Apache License 2.0. This license permits commercial use, subject to its terms and conditions. Copyright (c) 2025 Baidu, Inc. All Rights Reserved.\n\n## Citation\n\nIf you find ERNIE 4.5 useful or wish to use it in your projects, please kindly cite our technical report:\n\n```text\n@misc{ernie2025technicalreport,\n title={ERNIE 4.5 Technical Report},\n author={Baidu-ERNIE-Team},\n year={2025},\n primaryClass={cs.CL},\n howpublished={\url{https://ernie.baidu.com/blog/publication/ERNIE_Technical_Report.pdf}}\n}\n```\n\n",2025-09-08T14:18:31+00:00,[0],"model_finetune_model:unsloth/ERNIE-4.5-21B-A3B-Thinking, model_quantized_model:unsloth/ERNIE-4.5-21B-A3B-Thinking-GGUF, gabriellarson/ERNIE-4.5-21B-A3B-Thinking-GGUF, cpatonn/ERNIE-4.5-21B-A3B-Thinking-AWQ-8bit, cpatonn/ERNIE-4.5-21B-A3B-Thinking-AWQ-4bit, mradermacher/ERNIE-4.5-21B-A3B-Thinking-GGUF, nightmedia/ERNIE-4.5-21B-A3B-Thinking-mxfp4-mlx, wekW/ERNIE-4.5-21B-A3B-Thinking-Q8_0-GGUF","[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",2025-09-09T09:40:28+00:00,[0],,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
8,baidu/ERNIE-4.5-21B-A3B-Thinking,cpatonn/ERNIE-4.5-21B-A3B-Thinking-AWQ-4bit,model_quantized_model,3,model,model,"---\nlicense: apache-2.0\nlanguage:\n- en\n- zh\npipeline_tag: text-generation\ntags:\n- ERNIE4.5\nlibrary_name: transformers\n---\n\n<div align=""center"" style=""line-height: 1;"">\n <a href=""https://ernie.baidu.com/"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Chat"" src=""https://img.shields.io/badge/🤖_Chat-ERNIE_Bot-blue"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://huggingface.co/baidu"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Hugging Face"" src=""https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Baidu-ffc107?color=ffc107&logoColor=white"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://github.com/PaddlePaddle/ERNIE"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Github"" src=""https://img.shields.io/badge/GitHub-ERNIE-000?logo=github&color=0000FF"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://ernie.baidu.com/blog/ernie4.5"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Blog"" src=""https://img.shields.io/badge/🖖_Blog-ERNIE4.5-A020A0"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://discord.gg/JPmZXDsEEK"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Discord"" src=""https://img.shields.io/badge/Discord-ERNIE-5865F2?logo=discord&logoColor=white"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://x.com/PaddlePaddle"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""X"" src=""https://img.shields.io/badge/X-PaddlePaddle-6080F0""?logo=x&logoColor=white"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n</div>\n\n<div align=""center"" style=""line-height: 1;"">\n <a href=""#license"" style=""margin: 2px;"">\n <img alt=""License"" src=""https://img.shields.io/badge/License-Apache2.0-A5de54"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n</div>\n\n# ERNIE-4.5-21B-A3B-Thinking\n\n## Model Highlights\n\nOver the past three months, we have continued to scale the **thinking capability** of ERNIE-4.5-21B-A3B, improving both the **quality and depth** of reasoning, thereby advancing the competitiveness of ERNIE **lightweight models** in complex reasoning tasks. We are pleased to introduce **ERNIE-4.5-21B-A3B-Thinking**, featuring the following key enhancements:\n\n* **Significantly improved performance** on reasoning tasks, including logical reasoning, mathematics, science, coding, text generation, and academic benchmarks that typically require human expertise.\n* **Efficient tool usage** capabilities.\n* **Enhanced 128K long-context understanding** capabilities.\n\n> [!NOTE]\n> Note: This version has an increased thinking length. We strongly recommend its use in highly complex reasoning tasks.\n\n![benchmark](./benchmark.png)\n\n## Model Overview\n\nERNIE-4.5-21B-A3B-Thinking is a text MoE post-trained model, with 21B total parameters and 3B activated parameters for each token. The following are the model configuration details:\n\n|Key|Value|\n|-|-|\n|Modality|Text|\n|Training Stage|Posttraining|\n|Params(Total / Activated)|21B / 3B|\n|Layers|28|\n|Heads(Q/KV)|20 / 4|\n|Text Experts(Total / Activated)|64 / 6|\n|Shared Experts|2|\n|Context Length|131072|\n\n## Quickstart\n\n> [!NOTE]\n> To align with the wider community, this model releases Transformer-style weights. Both PyTorch and PaddlePaddle ecosystem tools, such as vLLM, transformers, and FastDeploy, are expected to be able to load and run this model.\n\n### FastDeploy Inference\n\nQuickly deploy services using FastDeploy as shown below. For more detailed usage, refer to the [FastDeploy GitHub Repository](https://github.com/PaddlePaddle/FastDeploy).\n\n**Note**: 80GB x 1 GPU resources are required. Deploying this model requires FastDeploy version 2.2.\n\n```bash\npython -m fastdeploy.entrypoints.openai.api_server \\n --model baidu/ERNIE-4.5-21B-A3B-Thinking \\n --port 8180 \\n --metrics-port 8181 \\n --engine-worker-queue-port 8182 \\n --load_choices ""default_v1"" \\n --tensor-parallel-size 1 \\n --max-model-len 131072 \\n --reasoning-parser ernie_x1 \\n --tool-call-parser ernie_x1 \\n --max-num-seqs 32\n```\n\nThe ERNIE-4.5-21B-A3B-Thinking model supports function call.\n\n```bash\ncurl -X POST ""http://0.0.0.0:8180/v1/chat/completions"" \\n-H ""Content-Type: application/json"" \\n-d $'{\n ""messages"": [\n {\n ""role"": ""user"",\n ""content"": ""How \'s the weather in Beijing today?""\n }\n ],\n ""tools"": [\n {\n ""type"": ""function"",\n ""function"": {\n ""name"": ""get_weather"",\n ""description"": ""Determine weather in my location"",\n ""parameters"": {\n ""type"": ""object"",\n ""properties"": {\n ""location"": {\n ""type"": ""string"",\n ""description"": ""The city and state e.g. San Francisco, CA""\n },\n ""unit"": {\n ""type"": ""string"",\n ""enum"": [\n ""c"",\n ""f""\n ]\n }\n },\n ""additionalProperties"": false,\n ""required"": [\n ""location"",\n ""unit""\n ]\n },\n ""strict"": true\n }\n }]\n}'\n```\n\n### vLLM inference\n\n```bash\nvllm serve baidu/ERNIE-4.5-21B-A3B-Thinking\n```\n\nThe `reasoning-parser` and `tool-call-parser` for vLLM Ernie are currently under development.\n\n### Using `transformers` library\n\n**Note**: You'll need the`transformers`library (version 4.54.0 or newer) installed to use this model.\n\nThe following contains a code snippet illustrating how to use the model generate content based on given inputs.\n\n```python\nimport torch\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\nmodel_name = ""baidu/ERNIE-4.5-21B-A3B-Thinking""\n\n# load the tokenizer and the model\ntokenizer = AutoTokenizer.from_pretrained(model_name)\nmodel = AutoModelForCausalLM.from_pretrained(\n model_name,\n device_map=""auto"",\n torch_dtype=torch.bfloat16,\n)\n\n# prepare the model input\nprompt = ""Give me a short introduction to large language model.""\nmessages = [\n {""role"": ""user"", ""content"": prompt}\n]\ntext = tokenizer.apply_chat_template(\n messages,\n tokenize=False,\n add_generation_prompt=True\n)\nmodel_inputs = tokenizer([text], add_special_tokens=False, return_tensors=""pt"").to(model.device)\n\n# conduct text completion\ngenerated_ids = model.generate(\n **model_inputs,\n max_new_tokens=1024\n)\noutput_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()\n\n# decode the generated ids\ngenerate_text = tokenizer.decode(output_ids, skip_special_tokens=True)\nprint(""generate_text:"", generate_text)\n```\n\n## License\n\nThe ERNIE 4.5 models are provided under the Apache License 2.0. This license permits commercial use, subject to its terms and conditions. Copyright (c) 2025 Baidu, Inc. All Rights Reserved.\n\n## Citation\n\nIf you find ERNIE 4.5 useful or wish to use it in your projects, please kindly cite our technical report:\n\n```text\n@misc{ernie2025technicalreport,\n title={ERNIE 4.5 Technical Report},\n author={Baidu-ERNIE-Team},\n year={2025},\n primaryClass={cs.CL},\n howpublished={\url{https://ernie.baidu.com/blog/publication/ERNIE_Technical_Report.pdf}}\n}\n```\n\n","---\nlicense: apache-2.0\nlanguage:\n- en\n- zh\npipeline_tag: text-generation\ntags:\n- ERNIE4.5\nlibrary_name: transformers\nbase_model:\n- baidu/ERNIE-4.5-21B-A3B-Thinking\n---\n\n<div align=""center"" style=""line-height: 1;"">\n <a href=""https://ernie.baidu.com/"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Chat"" src=""https://img.shields.io/badge/🤖_Chat-ERNIE_Bot-blue"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://huggingface.co/baidu"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Hugging Face"" src=""https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Baidu-ffc107?color=ffc107&logoColor=white"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://github.com/PaddlePaddle/ERNIE"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Github"" src=""https://img.shields.io/badge/GitHub-ERNIE-000?logo=github&color=0000FF"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://ernie.baidu.com/blog/ernie4.5"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Blog"" src=""https://img.shields.io/badge/🖖_Blog-ERNIE4.5-A020A0"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://discord.gg/JPmZXDsEEK"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Discord"" src=""https://img.shields.io/badge/Discord-ERNIE-5865F2?logo=discord&logoColor=white"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://x.com/PaddlePaddle"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""X"" src=""https://img.shields.io/badge/X-PaddlePaddle-6080F0""?logo=x&logoColor=white"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n</div>\n\n<div align=""center"" style=""line-height: 1;"">\n <a href=""#license"" style=""margin: 2px;"">\n <img alt=""License"" src=""https://img.shields.io/badge/License-Apache2.0-A5de54"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n</div>\n\n# ERNIE-4.5-21B-A3B-Thinking\n\n## Model Highlights\n\nOver the past three months, we have continued to scale the **thinking capability** of ERNIE-4.5-21B-A3B, improving both the **quality and depth** of reasoning, thereby advancing the competitiveness of ERNIE **lightweight models** in complex reasoning tasks. We are pleased to introduce **ERNIE-4.5-21B-A3B-Thinking**, featuring the following key enhancements:\n\n* **Significantly improved performance** on reasoning tasks, including logical reasoning, mathematics, science, coding, text generation, and academic benchmarks that typically require human expertise.\n* **Efficient tool usage** capabilities.\n* **Enhanced 128K long-context understanding** capabilities.\n\n> [!NOTE]\n> Note: This version has an increased thinking length. We strongly recommend its use in highly complex reasoning tasks.\n\n![benchmark](./benchmark.png)\n\n## Model Overview\n\nERNIE-4.5-21B-A3B-Thinking is a text MoE post-trained model, with 21B total parameters and 3B activated parameters for each token. The following are the model configuration details:\n\n|Key|Value|\n|-|-|\n|Modality|Text|\n|Training Stage|Posttraining|\n|Params(Total / Activated)|21B / 3B|\n|Layers|28|\n|Heads(Q/KV)|20 / 4|\n|Text Experts(Total / Activated)|64 / 6|\n|Vision Experts(Total / Activated)|64 / 6|\n|Shared Experts|2|\n|Context Length|131072|\n\n## Quickstart\n\n> [!NOTE]\n> To align with the wider community, this model releases Transformer-style weights. Both PyTorch and PaddlePaddle ecosystem tools, such as vLLM, transformers, and FastDeploy, are expected to be able to load and run this model.\n\n### FastDeploy Inference\n\nQuickly deploy services using FastDeploy as shown below. For more detailed usage, refer to the [FastDeploy GitHub Repository](https://github.com/PaddlePaddle/FastDeploy).\n\n**Note**: 80GB x 1 GPU resources are required. Deploying this model requires FastDeploy version 2.2.\n\n```bash\npython -m fastdeploy.entrypoints.openai.api_server \\n --model baidu/ERNIE-4.5-21B-A3B-Thinking \\n --port 8180 \\n --metrics-port 8181 \\n --engine-worker-queue-port 8182 \\n --load_choices ""default_v1"" \\n --tensor-parallel-size 1 \\n --max-model-len 131072 \\n --reasoning-parser ernie_x1 \\n --tool-call-parser ernie_x1 \\n --max-num-seqs 32\n```\n\nThe ERNIE-4.5-21B-A3B-Thinking model supports function call.\n\n```bash\ncurl -X POST ""http://0.0.0.0:8180/v1/chat/completions"" \\n-H ""Content-Type: application/json"" \\n-d $'{\n ""messages"": [\n {\n ""role"": ""user"",\n ""content"": ""How \'s the weather in Beijing today?""\n }\n ],\n ""tools"": [\n {\n ""type"": ""function"",\n ""function"": {\n ""name"": ""get_weather"",\n ""description"": ""Determine weather in my location"",\n ""parameters"": {\n ""type"": ""object"",\n ""properties"": {\n ""location"": {\n ""type"": ""string"",\n ""description"": ""The city and state e.g. San Francisco, CA""\n },\n ""unit"": {\n ""type"": ""string"",\n ""enum"": [\n ""c"",\n ""f""\n ]\n }\n },\n ""additionalProperties"": false,\n ""required"": [\n ""location"",\n ""unit""\n ]\n },\n ""strict"": true\n }\n }]\n}'\n```\n\n### vLLM inference\n\n```bash\nvllm serve baidu/ERNIE-4.5-21B-A3B-Thinking\n```\n\nThe `reasoning-parser` and `tool-call-parser` for vLLM Ernie are currently under development.\n\n### Using `transformers` library\n\n**Note**: You'll need the`transformers`library (version 4.54.0 or newer) installed to use this model.\n\nThe following contains a code snippet illustrating how to use the model generate content based on given inputs.\n\n```python\nimport torch\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\nmodel_name = ""baidu/ERNIE-4.5-21B-A3B-Thinking""\n\n# load the tokenizer and the model\ntokenizer = AutoTokenizer.from_pretrained(model_name)\nmodel = AutoModelForCausalLM.from_pretrained(\n model_name,\n device_map=""auto"",\n torch_dtype=torch.bfloat16,\n)\n\n# prepare the model input\nprompt = ""Give me a short introduction to large language model.""\nmessages = [\n {""role"": ""user"", ""content"": prompt}\n]\ntext = tokenizer.apply_chat_template(\n messages,\n tokenize=False,\n add_generation_prompt=True\n)\nmodel_inputs = tokenizer([text], add_special_tokens=False, return_tensors=""pt"").to(model.device)\n\n# conduct text completion\ngenerated_ids = model.generate(\n **model_inputs,\n max_new_tokens=1024\n)\noutput_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()\n\n# decode the generated ids\ngenerate_text = tokenizer.decode(output_ids, skip_special_tokens=True)\nprint(""generate_text:"", generate_text)\n```\n\n## License\n\nThe ERNIE 4.5 models are provided under the Apache License 2.0. This license permits commercial use, subject to its terms and conditions. Copyright (c) 2025 Baidu, Inc. All Rights Reserved.\n\n## Citation\n\nIf you find ERNIE 4.5 useful or wish to use it in your projects, please kindly cite our technical report:\n\n```text\n@misc{ernie2025technicalreport,\n title={ERNIE 4.5 Technical Report},\n author={Baidu-ERNIE-Team},\n year={2025},\n primaryClass={cs.CL},\n howpublished={\url{https://ernie.baidu.com/blog/publication/ERNIE_Technical_Report.pdf}}\n}\n```\n\n",2025-09-08T14:18:31+00:00,[0],"model_finetune_model:unsloth/ERNIE-4.5-21B-A3B-Thinking, model_quantized_model:unsloth/ERNIE-4.5-21B-A3B-Thinking-GGUF, gabriellarson/ERNIE-4.5-21B-A3B-Thinking-GGUF, cpatonn/ERNIE-4.5-21B-A3B-Thinking-AWQ-8bit, cpatonn/ERNIE-4.5-21B-A3B-Thinking-AWQ-4bit, mradermacher/ERNIE-4.5-21B-A3B-Thinking-GGUF, nightmedia/ERNIE-4.5-21B-A3B-Thinking-mxfp4-mlx, wekW/ERNIE-4.5-21B-A3B-Thinking-Q8_0-GGUF","[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",2025-09-09T11:34:57+00:00,[0],,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
9,baidu/ERNIE-4.5-21B-A3B-Thinking,mradermacher/ERNIE-4.5-21B-A3B-Thinking-GGUF,model_quantized_model,3,model,model,"---\nlicense: apache-2.0\nlanguage:\n- en\n- zh\npipeline_tag: text-generation\ntags:\n- ERNIE4.5\nlibrary_name: transformers\n---\n\n<div align=""center"" style=""line-height: 1;"">\n <a href=""https://ernie.baidu.com/"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Chat"" src=""https://img.shields.io/badge/🤖_Chat-ERNIE_Bot-blue"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://huggingface.co/baidu"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Hugging Face"" src=""https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Baidu-ffc107?color=ffc107&logoColor=white"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://github.com/PaddlePaddle/ERNIE"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Github"" src=""https://img.shields.io/badge/GitHub-ERNIE-000?logo=github&color=0000FF"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://ernie.baidu.com/blog/ernie4.5"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Blog"" src=""https://img.shields.io/badge/🖖_Blog-ERNIE4.5-A020A0"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://discord.gg/JPmZXDsEEK"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""Discord"" src=""https://img.shields.io/badge/Discord-ERNIE-5865F2?logo=discord&logoColor=white"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n <a href=""https://x.com/PaddlePaddle"" target=""_blank"" style=""margin: 2px;"">\n <img alt=""X"" src=""https://img.shields.io/badge/X-PaddlePaddle-6080F0""?logo=x&logoColor=white"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n</div>\n\n<div align=""center"" style=""line-height: 1;"">\n <a href=""#license"" style=""margin: 2px;"">\n <img alt=""License"" src=""https://img.shields.io/badge/License-Apache2.0-A5de54"" style=""display: inline-block; vertical-align: middle;""/>\n </a>\n</div>\n\n# ERNIE-4.5-21B-A3B-Thinking\n\n## Model Highlights\n\nOver the past three months, we have continued to scale the **thinking capability** of ERNIE-4.5-21B-A3B, improving both the **quality and depth** of reasoning, thereby advancing the competitiveness of ERNIE **lightweight models** in complex reasoning tasks. We are pleased to introduce **ERNIE-4.5-21B-A3B-Thinking**, featuring the following key enhancements:\n\n* **Significantly improved performance** on reasoning tasks, including logical reasoning, mathematics, science, coding, text generation, and academic benchmarks that typically require human expertise.\n* **Efficient tool usage** capabilities.\n* **Enhanced 128K long-context understanding** capabilities.\n\n> [!NOTE]\n> Note: This version has an increased thinking length. We strongly recommend its use in highly complex reasoning tasks.\n\n![benchmark](./benchmark.png)\n\n## Model Overview\n\nERNIE-4.5-21B-A3B-Thinking is a text MoE post-trained model, with 21B total parameters and 3B activated parameters for each token. The following are the model configuration details:\n\n|Key|Value|\n|-|-|\n|Modality|Text|\n|Training Stage|Posttraining|\n|Params(Total / Activated)|21B / 3B|\n|Layers|28|\n|Heads(Q/KV)|20 / 4|\n|Text Experts(Total / Activated)|64 / 6|\n|Shared Experts|2|\n|Context Length|131072|\n\n## Quickstart\n\n> [!NOTE]\n> To align with the wider community, this model releases Transformer-style weights. Both PyTorch and PaddlePaddle ecosystem tools, such as vLLM, transformers, and FastDeploy, are expected to be able to load and run this model.\n\n### FastDeploy Inference\n\nQuickly deploy services using FastDeploy as shown below. For more detailed usage, refer to the [FastDeploy GitHub Repository](https://github.com/PaddlePaddle/FastDeploy).\n\n**Note**: 80GB x 1 GPU resources are required. Deploying this model requires FastDeploy version 2.2.\n\n```bash\npython -m fastdeploy.entrypoints.openai.api_server \\n --model baidu/ERNIE-4.5-21B-A3B-Thinking \\n --port 8180 \\n --metrics-port 8181 \\n --engine-worker-queue-port 8182 \\n --load_choices ""default_v1"" \\n --tensor-parallel-size 1 \\n --max-model-len 131072 \\n --reasoning-parser ernie_x1 \\n --tool-call-parser ernie_x1 \\n --max-num-seqs 32\n```\n\nThe ERNIE-4.5-21B-A3B-Thinking model supports function call.\n\n```bash\ncurl -X POST ""http://0.0.0.0:8180/v1/chat/completions"" \\n-H ""Content-Type: application/json"" \\n-d $'{\n ""messages"": [\n {\n ""role"": ""user"",\n ""content"": ""How \'s the weather in Beijing today?""\n }\n ],\n ""tools"": [\n {\n ""type"": ""function"",\n ""function"": {\n ""name"": ""get_weather"",\n ""description"": ""Determine weather in my location"",\n ""parameters"": {\n ""type"": ""object"",\n ""properties"": {\n ""location"": {\n ""type"": ""string"",\n ""description"": ""The city and state e.g. San Francisco, CA""\n },\n ""unit"": {\n ""type"": ""string"",\n ""enum"": [\n ""c"",\n ""f""\n ]\n }\n },\n ""additionalProperties"": false,\n ""required"": [\n ""location"",\n ""unit""\n ]\n },\n ""strict"": true\n }\n }]\n}'\n```\n\n### vLLM inference\n\n```bash\nvllm serve baidu/ERNIE-4.5-21B-A3B-Thinking\n```\n\nThe `reasoning-parser` and `tool-call-parser` for vLLM Ernie are currently under development.\n\n### Using `transformers` library\n\n**Note**: You'll need the`transformers`library (version 4.54.0 or newer) installed to use this model.\n\nThe following contains a code snippet illustrating how to use the model generate content based on given inputs.\n\n```python\nimport torch\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\nmodel_name = ""baidu/ERNIE-4.5-21B-A3B-Thinking""\n\n# load the tokenizer and the model\ntokenizer = AutoTokenizer.from_pretrained(model_name)\nmodel = AutoModelForCausalLM.from_pretrained(\n model_name,\n device_map=""auto"",\n torch_dtype=torch.bfloat16,\n)\n\n# prepare the model input\nprompt = ""Give me a short introduction to large language model.""\nmessages = [\n {""role"": ""user"", ""content"": prompt}\n]\ntext = tokenizer.apply_chat_template(\n messages,\n tokenize=False,\n add_generation_prompt=True\n)\nmodel_inputs = tokenizer([text], add_special_tokens=False, return_tensors=""pt"").to(model.device)\n\n# conduct text completion\ngenerated_ids = model.generate(\n **model_inputs,\n max_new_tokens=1024\n)\noutput_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()\n\n# decode the generated ids\ngenerate_text = tokenizer.decode(output_ids, skip_special_tokens=True)\nprint(""generate_text:"", generate_text)\n```\n\n## License\n\nThe ERNIE 4.5 models are provided under the Apache License 2.0. This license permits commercial use, subject to its terms and conditions. Copyright (c) 2025 Baidu, Inc. All Rights Reserved.\n\n## Citation\n\nIf you find ERNIE 4.5 useful or wish to use it in your projects, please kindly cite our technical report:\n\n```text\n@misc{ernie2025technicalreport,\n title={ERNIE 4.5 Technical Report},\n author={Baidu-ERNIE-Team},\n year={2025},\n primaryClass={cs.CL},\n howpublished={\url{https://ernie.baidu.com/blog/publication/ERNIE_Technical_Report.pdf}}\n}\n```\n\n",,2025-09-08T14:18:31+00:00,[0],"model_finetune_model:unsloth/ERNIE-4.5-21B-A3B-Thinking, model_quantized_model:unsloth/ERNIE-4.5-21B-A3B-Thinking-GGUF, gabriellarson/ERNIE-4.5-21B-A3B-Thinking-GGUF, cpatonn/ERNIE-4.5-21B-A3B-Thinking-AWQ-8bit, cpatonn/ERNIE-4.5-21B-A3B-Thinking-AWQ-4bit, mradermacher/ERNIE-4.5-21B-A3B-Thinking-GGUF, nightmedia/ERNIE-4.5-21B-A3B-Thinking-mxfp4-mlx, wekW/ERNIE-4.5-21B-A3B-Thinking-Q8_0-GGUF","[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",,,,


In [16]:
import re
import pandas as pd
from tqdm import tqdm

# ---------- Step 1. Helper: clean and lighten long descriptions ----------
def clean_description(text):
    """Remove HTML/Markdown/code clutter and extract pipeline_tag if available."""
    if not isinstance(text, str) or not text.strip():
        return "", None

    # Extract pipeline_tag if present
    pipeline_tag_match = re.search(r"pipeline_tag:\s*([^\n]+)", text)
    pipeline_tag = pipeline_tag_match.group(1).strip() if pipeline_tag_match else None

    # Remove code blocks, HTML, markdown, and citations
    cleaned = re.sub(r"```.*?```", " ", text, flags=re.DOTALL)           # code blocks
    cleaned = re.sub(r"<[^>]+>", " ", cleaned)                           # HTML tags
    cleaned = re.sub(r"!\[[^\]]*\]\([^)]+\)", " ", cleaned)              # images
    cleaned = re.sub(r"\[[^\]]*\]\([^)]+\)", " ", cleaned)               # links
    cleaned = re.sub(r"#+\s*", " ", cleaned)                             # markdown headers
    cleaned = re.sub(r"---+", " ", cleaned)                              # separators
    cleaned = re.sub(r"[\*_`>]+", " ", cleaned)                          # markdown symbols
    cleaned = re.sub(r"\s+", " ", cleaned).strip()                       # extra spaces

    # Optionally shorten extremely long text
    if len(cleaned) > 800:
        cleaned = cleaned[:800] + " ..."

    return cleaned, pipeline_tag


# ---------- Step 2. Apply cleaning to both source and destination ----------
def preprocess_descriptions(df):
    """Apply description cleaning and extract pipeline tags for both source/dest."""
    tqdm.pandas(desc="🧹 Cleaning descriptions")

    df[["source_description", "source_pipeline_tag"]] = df["source_description"].progress_apply(
        lambda x: pd.Series(clean_description(x))
    )

    df[["dest_description", "dest_pipeline_tag"]] = df["dest_description"].progress_apply(
        lambda x: pd.Series(clean_description(x))
    )

    return df


edges_full_left_join = preprocess_descriptions(edges_full_left_join)


🧹 Cleaning descriptions: 100%|██████████| 299702/299702 [04:26<00:00, 1124.18it/s]
🧹 Cleaning descriptions: 100%|██████████| 299702/299702 [01:44<00:00, 2863.25it/s] 


In [17]:
edges_full_left_join.head()

Unnamed: 0,source_node,dest_node,edge_type,edge_attr,source_type,dest_type,source_description,dest_description,source_createdAt,source_y_multi_lab,source_relationships,source_y,dest_createdAt,dest_y_multi_lab,dest_relationships,dest_y,source_pipeline_tag,dest_pipeline_tag
0,tencent/SRPO,rockerBOO/flux.1-dev-SRPO,model_finetune_model,0,model,model,"library name: diffusers license: other license name: tencent-hunyuan-community license link: https://github.com/Tencent-Hunyuan/SRPO/blob/main/LICENSE.txt pipeline tag: text-to-image Directly Aligning the Full Diffusion Trajectory with Fine-Grained Human Preference &nbsp; &nbsp; &nbsp; Xiangwei Shen 1,2 , Zhimin Li 1 , Zhantao Yang 1 , Shiyi Zhang 3 , Yingfang Zhang 1 , Donghao Li 1 , Chunyu Wang 1 , Qinglin Lu 1 , Yansong Tang 3,✝ 1 Hunyuan, Tencent 2 School of Science and Engineering, The Chinese University of Hong Kong, Shenzhen 3 Shenzhen International Graduate School, Tsinghua University Equal contribution ✝ Corresponding author Abstract Recent studies have demonstrated the effectiveness of directly aligning diffusion models with human preferences using differentiable reward. However, ...","base model: - tencent/SRPO library name: diffusers license: other license name: tencent-hunyuan-community license link: https://github.com/Tencent-Hunyuan/SRPO/blob/main/LICENSE.txt pipeline tag: text-to-image bf16 and (remaking FP8 version) versions of SRPO from Tencent Directly Aligning the Full Diffusion Trajectory with Fine-Grained Human Preference &nbsp; &nbsp; &nbsp; Xiangwei Shen 1,2 , Zhimin Li 1 , Zhantao Yang 1 , Shiyi Zhang 3 , Yingfang Zhang 1 , Donghao Li 1 , Chunyu Wang 1 , Qinglin Lu 1 , Yansong Tang 3,✝ 1 Hunyuan, Tencent 2 School of Science and Engineering, The Chinese University of Hong Kong, Shenzhen 3 Shenzhen International Graduate School, Tsinghua University Equal contribution ✝ Corresponding author Abstract Recent studies have demonstrated the effectiveness of direct ...",2025-09-08T12:44:15+00:00,[19],"model_finetune_model:rockerBOO/flux.1-dev-SRPO, model_quantized_model:wikeeyang/SRPO-Refine-Quantized-v1.0, befox/SRPO-GGUF, wikeeyang/SRPO-for-ComfyUI","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",2025-09-10T21:11:28+00:00,[19],model_finetune_model:Alissonerdx/flux.1-dev-SRPO-LoRas,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",text-to-image,text-to-image
1,tencent/SRPO,wikeeyang/SRPO-Refine-Quantized-v1.0,model_quantized_model,3,model,model,"library name: diffusers license: other license name: tencent-hunyuan-community license link: https://github.com/Tencent-Hunyuan/SRPO/blob/main/LICENSE.txt pipeline tag: text-to-image Directly Aligning the Full Diffusion Trajectory with Fine-Grained Human Preference &nbsp; &nbsp; &nbsp; Xiangwei Shen 1,2 , Zhimin Li 1 , Zhantao Yang 1 , Shiyi Zhang 3 , Yingfang Zhang 1 , Donghao Li 1 , Chunyu Wang 1 , Qinglin Lu 1 , Yansong Tang 3,✝ 1 Hunyuan, Tencent 2 School of Science and Engineering, The Chinese University of Hong Kong, Shenzhen 3 Shenzhen International Graduate School, Tsinghua University Equal contribution ✝ Corresponding author Abstract Recent studies have demonstrated the effectiveness of directly aligning diffusion models with human preferences using differentiable reward. However, ...","library name: diffusers license: other license name: tencent-hunyuan-community license link: https://github.com/Tencent-Hunyuan/SRPO/blob/main/LICENSE.txt pipeline tag: text-to-image language: - en base model: - tencent/SRPO =================================================================================== 本模型为 https://huggingface.co/tencent/SRPO 模型的 精调 和 8bit/4bit (fp8 e4m3fn/Q8 0/Q4 1) 量化版本，主要提升出图的清晰度和模型的兼容性(第一张图片中的 SRPO-fp8 量化生成的图片，显得特别模糊，主要是由于采用 ComfyUI 模型加载并直接量化的方式造成，并非模型 fp8 精度下的实际表现，实际表现请参阅第二张对比图，为避免使用者误解，特提供第二张对比图，模型在不同精度下的表现是正常的)。 This model is the refine and quantized version of the model: https://huggingface.co/tencent/SRPO, it improve the clarity of the generated images and the compatibility of the models. (In below image, the SRPO-fp8 means load and quantized directly by Comf ...",2025-09-08T12:44:15+00:00,[19],"model_finetune_model:rockerBOO/flux.1-dev-SRPO, model_quantized_model:wikeeyang/SRPO-Refine-Quantized-v1.0, befox/SRPO-GGUF, wikeeyang/SRPO-for-ComfyUI","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",2025-09-13T05:29:39+00:00,[19],,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",text-to-image,text-to-image
2,tencent/SRPO,befox/SRPO-GGUF,model_quantized_model,3,model,model,"library name: diffusers license: other license name: tencent-hunyuan-community license link: https://github.com/Tencent-Hunyuan/SRPO/blob/main/LICENSE.txt pipeline tag: text-to-image Directly Aligning the Full Diffusion Trajectory with Fine-Grained Human Preference &nbsp; &nbsp; &nbsp; Xiangwei Shen 1,2 , Zhimin Li 1 , Zhantao Yang 1 , Shiyi Zhang 3 , Yingfang Zhang 1 , Donghao Li 1 , Chunyu Wang 1 , Qinglin Lu 1 , Yansong Tang 3,✝ 1 Hunyuan, Tencent 2 School of Science and Engineering, The Chinese University of Hong Kong, Shenzhen 3 Shenzhen International Graduate School, Tsinghua University Equal contribution ✝ Corresponding author Abstract Recent studies have demonstrated the effectiveness of directly aligning diffusion models with human preferences using differentiable reward. However, ...",,2025-09-08T12:44:15+00:00,[19],"model_finetune_model:rockerBOO/flux.1-dev-SRPO, model_quantized_model:wikeeyang/SRPO-Refine-Quantized-v1.0, befox/SRPO-GGUF, wikeeyang/SRPO-for-ComfyUI","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",,,,,text-to-image,
3,tencent/SRPO,wikeeyang/SRPO-for-ComfyUI,model_quantized_model,3,model,model,"library name: diffusers license: other license name: tencent-hunyuan-community license link: https://github.com/Tencent-Hunyuan/SRPO/blob/main/LICENSE.txt pipeline tag: text-to-image Directly Aligning the Full Diffusion Trajectory with Fine-Grained Human Preference &nbsp; &nbsp; &nbsp; Xiangwei Shen 1,2 , Zhimin Li 1 , Zhantao Yang 1 , Shiyi Zhang 3 , Yingfang Zhang 1 , Donghao Li 1 , Chunyu Wang 1 , Qinglin Lu 1 , Yansong Tang 3,✝ 1 Hunyuan, Tencent 2 School of Science and Engineering, The Chinese University of Hong Kong, Shenzhen 3 Shenzhen International Graduate School, Tsinghua University Equal contribution ✝ Corresponding author Abstract Recent studies have demonstrated the effectiveness of directly aligning diffusion models with human preferences using differentiable reward. However, ...","library name: diffusers license: other license name: tencent-hunyuan-community license link: https://github.com/Tencent-Hunyuan/SRPO/blob/main/LICENSE.txt pipeline tag: text-to-image language: - en base model: - tencent/SRPO =================================================================================== 本模型为 https://huggingface.co/tencent/SRPO 模型的 转换 和 8bit/4bit (fp8 e4m3fn/Q8 0/Q4 1) 量化版本，以适配 ComfyUI 用户环境正常加载和出图，保持原模型正常的出图效果。 This model is the converted and quantized version of the model: https://huggingface.co/tencent/SRPO, To adapt the ComfyUI environment for normal loading and output of images, maintaining the original model's normal effects. For bf16 version, Pls download it from: https://www.modelscope.cn/models/wikeeyang/SRPO-for-ComfyUI License Agreement Please fall under SRPO ...",2025-09-08T12:44:15+00:00,[19],"model_finetune_model:rockerBOO/flux.1-dev-SRPO, model_quantized_model:wikeeyang/SRPO-Refine-Quantized-v1.0, befox/SRPO-GGUF, wikeeyang/SRPO-for-ComfyUI","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",2025-09-16T04:54:58+00:00,[19],,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",text-to-image,text-to-image
4,baidu/ERNIE-4.5-21B-A3B-Thinking,unsloth/ERNIE-4.5-21B-A3B-Thinking,model_finetune_model,0,model,model,"license: apache-2.0 language: - en - zh pipeline tag: text-generation tags: - ERNIE4.5 library name: transformers ERNIE-4.5-21B-A3B-Thinking Model Highlights Over the past three months, we have continued to scale the thinking capability of ERNIE-4.5-21B-A3B, improving both the quality and depth of reasoning, thereby advancing the competitiveness of ERNIE lightweight models in complex reasoning tasks. We are pleased to introduce ERNIE-4.5-21B-A3B-Thinking , featuring the following key enhancements: Significantly improved performance on reasoning tasks, including logical reasoning, mathematics, science, coding, text generation, and academic benchmarks that typically require human expertise. Efficient tool usage capabilities. Enhanced 128K long-context understanding capabilities. [!NOTE] Note ...","license: apache-2.0 language: - en - zh pipeline tag: text-generation tags: - ERNIE4.5 library name: transformers base model: baidu/ERNIE-4.5-21B-A3B-Thinking ERNIE-4.5-21B-A3B-Thinking Model Highlights Over the past three months, we have continued to scale the thinking capability of ERNIE-4.5-21B-A3B, improving both the quality and depth of reasoning, thereby advancing the competitiveness of ERNIE lightweight models in complex reasoning tasks. We are pleased to introduce ERNIE-4.5-21B-A3B-Thinking , featuring the following key enhancements: Significantly improved performance on reasoning tasks, including logical reasoning, mathematics, science, coding, text generation, and academic benchmarks that typically require human expertise. Efficient tool usage capabilities. Enhanced 128K long-con ...",2025-09-08T14:18:31+00:00,[0],"model_finetune_model:unsloth/ERNIE-4.5-21B-A3B-Thinking, model_quantized_model:unsloth/ERNIE-4.5-21B-A3B-Thinking-GGUF, gabriellarson/ERNIE-4.5-21B-A3B-Thinking-GGUF, cpatonn/ERNIE-4.5-21B-A3B-Thinking-AWQ-8bit, cpatonn/ERNIE-4.5-21B-A3B-Thinking-AWQ-4bit, mradermacher/ERNIE-4.5-21B-A3B-Thinking-GGUF, nightmedia/ERNIE-4.5-21B-A3B-Thinking-mxfp4-mlx, wekW/ERNIE-4.5-21B-A3B-Thinking-Q8_0-GGUF","[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",2025-09-10T10:38:04+00:00,[0],,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",text-generation,text-generation


In [18]:
import os
import pandas as pd
import gc, torch, psutil

save_path = "/home/hice1/cxu371/scratch/final_edges_full_left_join.parquet"

# --- Try Parquet first (fast, compact) ---
try:
    edges_full_left_join.to_parquet(save_path, index=False)
    print(f"✅ Saved cleaned edges_full_left_join to Parquet:\n{save_path}")
except Exception as e:
    # Fallback to Pickle if Parquet libraries aren't available
    alt_path = save_path.replace(".parquet", ".pkl")
    edges_full_left_join.to_pickle(alt_path)
    save_path = alt_path
    print(f"⚠️ Parquet failed, saved as Pickle instead:\n{save_path}\nError: {e}")

# --- Check file size ---
if os.path.exists(save_path):
    size_mb = os.path.getsize(save_path) / 1e6
    print(f"📦 File size: {size_mb:.2f} MB")

# --- Optional: clear up memory (keeps encoder intact) ---
gc.collect()
if torch.cuda.is_available():
    torch.cuda.empty_cache()

print(f"✅ RAM after cleanup: {psutil.Process().memory_info().rss / 1e9:.2f} GB")


⚠️ Parquet failed, saved as Pickle instead:
/home/hice1/cxu371/scratch/final_edges_full_left_join.pkl
Error: Unable to find a usable engine; tried using: 'pyarrow', 'fastparquet'.
A suitable version of pyarrow or fastparquet is required for parquet support.
Trying to import the above resulted in these errors:
 - Missing optional dependency 'pyarrow'. pyarrow is required for parquet support. Use pip or conda to install pyarrow.
 - Missing optional dependency 'fastparquet'. fastparquet is required for parquet support. Use pip or conda to install fastparquet.
📦 File size: 438.25 MB
✅ RAM after cleanup: 5.89 GB


In [111]:
import pandas as pd

# Count appearances as source
src_counts = (
    edges_df.groupby(["source_node", "source_type"])
    .size()
    .reset_index(name="source_edge_count")
)

# Count appearances as destination
dst_counts = (
    edges_df.groupby(["dest_node", "dest_type"])
    .size()
    .reset_index(name="dest_edge_count")
)

# Merge both counts, filling missing with 0
node_counts = pd.merge(
    src_counts,
    dst_counts,
    left_on=["source_node", "source_type"],
    right_on=["dest_node", "dest_type"],
    how="outer"
).fillna(0)

# Consolidate to unified columns
node_counts["node"] = node_counts["source_node"].combine_first(node_counts["dest_node"])
node_counts["type"] = node_counts["source_type"].combine_first(node_counts["dest_type"])

# Total number of edges (as source + as dest)
node_counts["total_edges"] = node_counts["source_edge_count"] + node_counts["dest_edge_count"]

# Keep only relevant columns
node_counts = node_counts[["node", "type", "total_edges"]].sort_values(
    by="total_edges", ascending=False
).reset_index(drop=True)

display(node_counts.head(20))



Unnamed: 0,node,type,total_edges
0,Qwen/Qwen1.5-0.5B,model,30753.0
1,Qwen/Qwen1.5-1.8B,model,29059.0
2,google/gemma-2b,model,22578.0
3,google/gemma-7b,model,8929.0
4,distilbert/distilbert-base-uncased,model,6171.0
5,Qwen/Qwen1.5-7B,model,6118.0
6,stabilityai/stable-diffusion-xl-base-1.0,model,5516.0
7,black-forest-labs/FLUX.1-dev,model,5192.0
8,unsloth/llama-3-8b-bnb-4bit,model,2945.0
9,openai-community/gpt2,model,2677.0


In [115]:
percentiles = node_counts["total_edges"].quantile([0.25, 0.5, 0.75, 0.9, 0.99, 0.999, 1])
print("📈 Edge count percentiles for (node, type) pairs:")
for p, v in percentiles.items():
    print(f"{int(p*100)}% percentile: {v}")


📈 Edge count percentiles for (node, type) pairs:
25% percentile: 1.0
50% percentile: 1.0
75% percentile: 1.0
90% percentile: 2.0
99% percentile: 10.0
99% percentile: 93.0
100% percentile: 30753.0


In [20]:
# ---------- Step 3. Build node text (without relationships) ----------
def build_node_text(name, node_type, desc, pipeline_tag):
    """Build node representation text for triple construction."""
    parts = []
    name = str(name).strip() if isinstance(name, str) else ""
    node_type = str(node_type).strip() if isinstance(node_type, str) else ""
    desc = str(desc).strip() if isinstance(desc, str) else ""
    pipeline_tag = str(pipeline_tag).strip() if isinstance(pipeline_tag, str) else ""

    if name:
        parts.append(f"Node: {name}")
    if node_type:
        parts.append(f"Type: {node_type}")
    if pipeline_tag:
        parts.append(f"Pipeline: {pipeline_tag}")
    if desc:
        parts.append(f"Description: {desc}")

    return ". ".join(parts)


# ---------- Step 4. Build triples ----------
triples = []
total = len(edges_full_left_join)
print(f"\n🔗 Building triples for {total:,} edges...\n")

for idx, row in enumerate(
    tqdm(edges_full_left_join.itertuples(index=False), total=total, desc="Generating triples")
):
    src_text = build_node_text(row.source_node, row.source_type, row.source_description, row.source_pipeline_tag)
    dst_text = build_node_text(row.dest_node, row.dest_type, row.dest_description, row.dest_pipeline_tag)
    relation = str(row.edge_type).strip()
    triples.append((src_text, relation, dst_text))

    if idx % 5000 == 0:
        gc.collect()  # optional: free memory periodically

print(f"✅ Triples generated successfully! Total count: {len(triples):,}")




🔗 Building triples for 299,702 edges...



Generating triples: 100%|██████████| 299702/299702 [00:18<00:00, 15831.65it/s]

✅ Triples generated successfully! Total count: 299,702





In [21]:
torch.save(triples, "triples_cleaned.pt")
print("💾 Saved triples to 'triples_cleaned.pt'")
print("Example triple:")
print(triples[0])

💾 Saved triples to 'triples_cleaned.pt'
Example triple:
('Node: tencent/SRPO. Type: model. Pipeline: text-to-image. Description: library name: diffusers license: other license name: tencent-hunyuan-community license link: https://github.com/Tencent-Hunyuan/SRPO/blob/main/LICENSE.txt pipeline tag: text-to-image Directly Aligning the Full Diffusion Trajectory with Fine-Grained Human Preference &nbsp; &nbsp; &nbsp; Xiangwei Shen 1,2 , Zhimin Li 1 , Zhantao Yang 1 , Shiyi Zhang 3 , Yingfang Zhang 1 , Donghao Li 1 , Chunyu Wang 1 , Qinglin Lu 1 , Yansong Tang 3,✝ 1 Hunyuan, Tencent 2 School of Science and Engineering, The Chinese University of Hong Kong, Shenzhen 3 Shenzhen International Graduate School, Tsinghua University Equal contribution ✝ Corresponding author Abstract Recent studies have demonstrated the effectiveness of directly aligning diffusion models with human preferences using differentiable reward. However, ...', 'model_finetune_model', 'Node: rockerBOO/flux.1-dev-SRPO. Type: 

In [16]:
triples = torch.load("triples_cleaned.pt")

In [22]:
# turn triples into graph

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# creating the embedding model
sent_trans_batch_size = 256


print("Creating the graph data from raw triples...")
# create the graph data from raw triples
graph_data = create_graph_from_triples(
    triples=triples, embedding_model=encoder.encode,
    embedding_method_kwargs={
        "batch_size": min(len(triples), sent_trans_batch_size),
        "verbose": True
    }, pre_transform=preprocess_triplet)

Creating the graph data from raw triples...


Encoding 278062 strings w/ SentenceTransformer: 100%|██████████| 1087/1087 [26:32<00:00,  1.47s/it]
Encoding 5 strings w/ SentenceTransformer: 100%|██████████| 1/1 [00:00<00:00,  3.52it/s]


In [23]:
torch.save(graph_data, "final_graph_krystal.pt")
print("💾 Saved graph_data to 'final_graph_krystal.pt'")

💾 Saved graph_data to 'final_graph_krystal.pt'


In [21]:
graph_data = torch.load("final_graph_krystal.pt", weights_only=False)

In [22]:
print(graph_data)

Data(x=[278062, 768], edge_index=[2, 299702], edge_attr=[299702, 768], edge_id=[299702], node_id=[278062])


In [36]:
import gc
gc.collect()
if torch.cuda.is_available():
    torch.cuda.empty_cache()

print(f"✅ RAM after cleanup: {psutil.Process().memory_info().rss / 1e9:.2f} GB")


✅ RAM after cleanup: 9.54 GB


In [51]:
# creating the graph and feature stores
fs, gs = create_remote_backend_from_graph_data(
    graph_data=graph_data, path="backend",
    graph_db=NeighborSamplingRAGGraphStore,
    feature_db=KNNRAGFeatureStore).load()

In [52]:
subgraph_filter = make_pcst_filter(
    triples,
    encoder,
    topk=12,         # modest node selection
    topk_e=35,       # allow moderate edge budget
    cost_e=0.45,     # make edges moderately costly
    num_clusters=5   # allow fragmentation for superhubs
)

fanout = 80         # max neighbors per hop
num_hops = 2        # keep expansion shallow

query_loader_config = {
    "k_nodes": 512,  # reduced max KNN retrieval per query
    "num_neighbors": [fanout] * num_hops,
    "encoder_model": encoder,
}

query_loader = RAGQueryLoader(
    graph_data=(fs, gs),
    subgraph_filter=subgraph_filter,
    config=query_loader_config
)


In [53]:
subgraph = query_loader.query('what are some of the tasks for model tencent/SRPO?')



In [54]:
subgraph

Data(x=[2, 768], edge_index=[2, 6], edge_attr=[6, 768], node_idx=[2], edge_idx=[6], desc='node_id,node_attr
0,"Node: tencent/SRPO. Type: model. Pipeline: text-to-image. Description: library name: diffusers license: other license name: tencent-hunyuan-community license link: https://github.com/Tencent-Hunyuan/SRPO/blob/main/LICENSE.txt pipeline tag: text-to-image Directly Aligning the Full Diffusion Trajectory with Fine-Grained Human Preference &nbsp; &nbsp; &nbsp; Xiangwei Shen 1,2 , Zhimin Li 1 , Zhantao Yang 1 , Shiyi Zhang 3 , Yingfang Zhang 1 , Donghao Li 1 , Chunyu Wang 1 , Qinglin Lu 1 , Yansong Tang 3,✝ 1 Hunyuan, Tencent 2 School of Science and Engineering, The Chinese University of Hong Kong, Shenzhen 3 Shenzhen International Graduate School, Tsinghua University Equal contribution ✝ Corresponding author Abstract Recent studies have demonstrated the effectiveness of directly aligning diffusion models with human preferences using differentiable reward. However, ..."
1,"Node: roc

In [55]:
import pandas as pd
import gc
import torch

# --- Step 1. Extract edge info safely ---
src_s = subgraph.edge_index[0].cpu().numpy()
dst_s = subgraph.edge_index[1].cpu().numpy()
edge_attr_hash_s = [hash(tuple(e.cpu().tolist())) for e in subgraph.edge_attr]

subgraph_edges_tmp = pd.DataFrame({
    "src": src_s,
    "edge_hash": edge_attr_hash_s,
    "dst": dst_s
})

# --- Step 2. Detect duplicates ---
dupe_mask_s = subgraph_edges_tmp.duplicated(subset=["src", "edge_hash", "dst"], keep=False)
num_dupes = dupe_mask_s.sum()
total_edges = len(subgraph_edges_tmp)

print(f"🔍 Total edges in subgraph: {total_edges:,}")
print(f"⚠️ Duplicate edges found: {num_dupes:,}")
print(f"✅ Unique edges: {total_edges - num_dupes:,}")

if num_dupes > 0:
    print("\nExamples of duplicated edges:")
    display(subgraph_edges_tmp[dupe_mask_s].head(5))

# --- Step 3. Cleanup ---
del src_s, dst_s, edge_attr_hash_s, dupe_mask_s, subgraph_edges_tmp
gc.collect()
if torch.cuda.is_available():
    torch.cuda.empty_cache()

try:
    import psutil
    print(f"✅ RAM after cleanup: {psutil.virtual_memory().available / 1e9:.2f} GB")
except ImportError:
    print("✅ Memory cleanup complete.")


🔍 Total edges in subgraph: 6
⚠️ Duplicate edges found: 6
✅ Unique edges: 0

Examples of duplicated edges:


Unnamed: 0,src,edge_hash,dst
0,0,-37623898268095519,1
1,0,-37623898268095519,1
2,0,-37623898268095519,1
3,0,-37623898268095519,1
4,0,-37623898268095519,1


✅ RAM after cleanup: 176.11 GB


In [41]:
# def adaptive_query_loader(fs, gs, triples, encoder):
#     """Create a query_loader with settings that scale with graph density."""
#     num_edges = len(triples)
#     num_nodes = len(set([t[0] for t in triples] + [t[2] for t in triples]))
#     edge_density = num_edges / max(num_nodes, 1)

#     # --- Adaptive thresholds ---
#     if edge_density < 2:  # very sparse
#         topk, topk_e, cost_e, num_clusters = 10, 25, 0.5, 6
#         fanout, num_hops, k_nodes = 60, 2, 512
#     elif edge_density < 5:  # medium
#         topk, topk_e, cost_e, num_clusters = 15, 40, 0.4, 4
#         fanout, num_hops, k_nodes = 120, 3, 768
#     else:  # dense graph
#         topk, topk_e, cost_e, num_clusters = 25, 60, 0.25, 2
#         fanout, num_hops, k_nodes = 200, 3, 1024

#     print(f"📊 Graph density = {edge_density:.2f}")
#     print(f"➡️  Using adaptive config: topk={topk}, topk_e={topk_e}, cost_e={cost_e}, "
#           f"num_clusters={num_clusters}, fanout={fanout}, hops={num_hops}, k_nodes={k_nodes}")

#     subgraph_filter = make_pcst_filter(
#         triples,
#         encoder,
#         topk=topk,
#         topk_e=topk_e,
#         cost_e=cost_e,
#         num_clusters=num_clusters
#     )

#     query_loader_config = {
#         "k_nodes": k_nodes,
#         "num_neighbors": [fanout] * num_hops,
#         "encoder_model": encoder,
#     }

#     return RAGQueryLoader(
#         graph_data=(fs, gs),
#         subgraph_filter=subgraph_filter,
#         config=query_loader_config
#     )




In [42]:
# query_loader = adaptive_query_loader(fs, gs, triples, encoder)

📊 Graph density = 1.08
➡️  Using adaptive config: topk=10, topk_e=25, cost_e=0.5, num_clusters=6, fanout=60, hops=2, k_nodes=512


In [43]:
# subgraph = query_loader.query('what are some of the tasks for model tencent/SRPO?')



In [44]:
# subgraph

Data(x=[2, 768], edge_index=[2, 7], edge_attr=[7, 768], node_idx=[2], edge_idx=[7], desc='node_id,node_attr
0,"Node: tencent/SRPO. Type: model. Pipeline: text-to-image. Description: library name: diffusers license: other license name: tencent-hunyuan-community license link: https://github.com/Tencent-Hunyuan/SRPO/blob/main/LICENSE.txt pipeline tag: text-to-image Directly Aligning the Full Diffusion Trajectory with Fine-Grained Human Preference &nbsp; &nbsp; &nbsp; Xiangwei Shen 1,2 , Zhimin Li 1 , Zhantao Yang 1 , Shiyi Zhang 3 , Yingfang Zhang 1 , Donghao Li 1 , Chunyu Wang 1 , Qinglin Lu 1 , Yansong Tang 3,✝ 1 Hunyuan, Tencent 2 School of Science and Engineering, The Chinese University of Hong Kong, Shenzhen 3 Shenzhen International Graduate School, Tsinghua University Equal contribution ✝ Corresponding author Abstract Recent studies have demonstrated the effectiveness of directly aligning diffusion models with human preferences using differentiable reward. However, ..."
1,"Node: roc

In [47]:
# subgraph = query_loader.query('Show me all the models that have edges with model Qwen/Qwen1.5-0.5B?')

In [48]:
# subgraph

Data(x=[3, 768], edge_index=[2, 478], edge_attr=[478, 768], node_idx=[3], edge_idx=[478], desc='node_id,node_attr
19792,Node: hendra01/qwen-2.5-finetuned. Type: model
19793,Node: binh230/qwen2.5-finetuned. Type: model
19702,"Node: Qwen/Qwen2.5-7B-Instruct. Type: model. Pipeline: text-generation. Description: license: apache-2.0 license link: https://huggingface.co/Qwen/Qwen2.5-7B-Instruct/blob/main/LICENSE language: - en pipeline tag: text-generation base model: Qwen/Qwen2.5-7B tags: - chat library name: transformers Qwen2.5-7B-Instruct Introduction Qwen2.5 is the latest series of Qwen large language models. For Qwen2.5, we release a number of base language models and instruction-tuned language models ranging from 0.5 to 72 billion parameters. Qwen2.5 brings the following improvements upon Qwen2: - Significantly more knowledge and has greatly improved capabilities in coding and mathematics , thanks to our specialized expert models in these domains. - Significant improvements in instruc

In [50]:
import torch
import pandas as pd
import gc

# --- Step 1. Extract edge data safely ---
src = graph_data.edge_index[0].cpu().numpy()
dst = graph_data.edge_index[1].cpu().numpy()

# Create hashable fingerprints for edge_attr (embeddings)
edge_attr_hash = [hash(tuple(e.cpu().tolist())) for e in graph_data.edge_attr]

graph_edges_tmp = pd.DataFrame({
    "src": src,
    "edge_hash": edge_attr_hash,
    "dst": dst
})

# --- Step 2. Detect duplicates ---
dupe_mask_tmp = graph_edges_tmp.duplicated(subset=["src", "edge_hash", "dst"], keep=False)
num_dupes = dupe_mask_tmp.sum()
total_edges = len(graph_edges_tmp)

print(f"🔍 Total edges: {total_edges:,}")
print(f"⚠️ Duplicate edges found: {num_dupes:,}")
print(f"✅ Unique edges: {total_edges - num_dupes:,}")

if num_dupes > 0:
    print("\nExamples of duplicated edges:")
    display(graph_edges_tmp[dupe_mask_tmp].head(5))

# --- Step 3. Cleanup ---
del src, dst, edge_attr_hash, dupe_mask_tmp, graph_edges_tmp
gc.collect()
if torch.cuda.is_available():
    torch.cuda.empty_cache()

# Optional RAM check
try:
    import psutil
    print(f"✅ RAM after cleanup: {psutil.virtual_memory().available / 1e9:.2f} GB")
except ImportError:
    print("✅ Memory cleanup complete.")



🔍 Total edges: 299,702
⚠️ Duplicate edges found: 0
✅ Unique edges: 299,702
✅ RAM after cleanup: 176.34 GB
