diff --git a/fastdeploy/model_executor/layers/backends/xpu/moe/fused_moe.py b/fastdeploy/model_executor/layers/backends/xpu/moe/fused_moe.py index 89af18a1b01..b8a8b6457d0 100644 --- a/fastdeploy/model_executor/layers/backends/xpu/moe/fused_moe.py +++ b/fastdeploy/model_executor/layers/backends/xpu/moe/fused_moe.py @@ -644,7 +644,7 @@ def apply_tp( layer.down_proj_weight, None, # moe_ffn1_bias None, # moe_ffn2_bias - getattr(layer, "up_gate_proj_in_scale", None), + (ffn1_act_scale_per_token if hasattr(layer, "up_gate_proj_in_scale") else None), getattr(layer, "down_proj_in_scale", None), getattr(layer, "up_gate_proj_weight_scale", None), getattr(layer, "down_proj_weight_scale", None),