Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 3 additions & 24 deletions fastdeploy/model_executor/layers/moe/fused_moe_backend_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,8 @@
from fastdeploy.model_executor.utils import (
TensorTracker,
default_weight_loader,
free_tensor,
process_weight_transpose,
set_weight_attrs,
weight_fully_copied,
)
from fastdeploy.platforms import current_platform

Expand Down Expand Up @@ -312,25 +311,5 @@ def create_weights(self, layer: nn.Layer, **extra_weight_attrs):
def process_weights_after_loading(self, layer):
if self.model_format != "torch":
return
if not weight_fully_copied(layer.up_gate_proj_weight) or not weight_fully_copied(layer.down_proj_weight):
return
up_gate_proj_weight_transpose = layer.up_gate_proj_weight.transpose([0, 2, 1])
down_proj_weight_transpose = layer.down_proj_weight.transpose([0, 2, 1])
up_gate_proj = layer.create_parameter(
shape=up_gate_proj_weight_transpose.shape,
dtype=up_gate_proj_weight_transpose.dtype,
default_initializer=paddle.nn.initializer.Normal(mean=0.0, std=0.02),
is_bias=False,
)
up_gate_proj.copy_(up_gate_proj_weight_transpose, False)
free_tensor(layer.up_gate_proj_weight)
layer.up_gate_proj_weight = up_gate_proj
down_proj = layer.create_parameter(
shape=down_proj_weight_transpose.shape,
dtype=down_proj_weight_transpose.dtype,
default_initializer=paddle.nn.initializer.Normal(mean=0.0, std=0.02),
is_bias=False,
)
down_proj.copy_(down_proj_weight_transpose, False)
free_tensor(layer.down_proj_weight)
layer.down_proj_weight = down_proj
process_weight_transpose(layer, "up_gate_proj_weight")
process_weight_transpose(layer, "down_proj_weight")
Loading