Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
156 changes: 93 additions & 63 deletions fastdeploy/cache_manager/ops.py
Original file line number Diff line number Diff line change
@@ -1,77 +1,107 @@
"""
Copy link

Copilot AI Nov 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PR Title Missing Required Tag: The PR title "dummy import fd" does not follow the required format of [CLASS]Title. According to the coding guidelines and checklist, it should include a tag such as [BugFix], [Feature], [Optimization], etc. For example: "[BugFix] Add fallback handling for import errors" or "[Feature] Add error handling for platform-specific imports".

Copilot generated this review using guidance from repository custom instructions.
Copy link

Copilot AI Nov 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PR Description Incomplete: The PR description does not explain the motivation or modifications being made. According to the coding guidelines, the description should at minimum explain why these modifications are being made and what problem is being solved. Please provide:

  1. Motivation: Why is the try-except block being added?
  2. Modifications: What specific issue does wrapping the imports in try-except solve?
  3. Usage/Command: How should this change be used or tested?

Copilot generated this review using guidance from repository custom instructions.
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
Copy link

Copilot AI Nov 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Extra space in copyright header: There are two spaces between "2025" and "PaddlePaddle" instead of one.

Suggested change
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.

Copilot uses AI. Check for mistakes.
#
# Licensed under the Apache License, Version 2.0 (the "License"
Copy link

Copilot AI Nov 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing semicolon in license header: The Apache License header comment should end with ");" not just ")".

Suggested change
# Licensed under the Apache License, Version 2.0 (the "License"
# Licensed under the Apache License, Version 2.0 (the "License");

Copilot uses AI. Check for mistakes.
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""

import paddle

from fastdeploy.platforms import current_platform

if current_platform.is_cuda():
from fastdeploy.model_executor.ops.gpu import (
cuda_host_alloc,
cuda_host_free,
get_data_ptr_ipc,
get_output_kv_signal,
ipc_sent_key_value_cache_by_remote_ptr,
ipc_sent_key_value_cache_by_remote_ptr_block_sync,
set_data_ipc,
share_external_data,
swap_cache_all_layers,
unset_data_ipc,
)

memory_allocated = paddle.device.cuda.memory_allocated

def get_peer_mem_addr(*args, **kwargs):
raise RuntimeError("CUDA no need of get_peer_mem_addr!")

elif current_platform.is_xpu():
from fastdeploy.model_executor.ops.xpu import (
cuda_host_alloc,
cuda_host_free,
get_output_kv_signal,
get_peer_mem_addr,
set_data_ipc,
share_external_data,
swap_cache_all_layers,
)

unset_data_ipc = None
memory_allocated = paddle.device.xpu.memory_allocated

def get_data_ptr_ipc(*args, **kwargs):
raise RuntimeError("XPU get_data_ptr_ipc UNIMPLENENTED!")

def ipc_sent_key_value_cache_by_remote_ptr(*args, **kwargs):
raise RuntimeError("XPU ipc_sent_key_value_cache_by_remote_ptr UNIMPLENENTED")

def ipc_sent_key_value_cache_by_remote_ptr_block_sync(*args, **kwargs):
raise RuntimeError("XPU No ipc_sent_key_value_cache_by_remote_ptr UNIMPLENENTED")

else:
raise RuntimeError("Prefix cache ops only supported CUDA nor XPU platform ")


def set_device(device):
try:
if current_platform.is_cuda():
paddle.set_device(f"gpu:{device}")
from fastdeploy.model_executor.ops.gpu import (
cuda_host_alloc,
cuda_host_free,
get_data_ptr_ipc,
get_output_kv_signal,
ipc_sent_key_value_cache_by_remote_ptr,
ipc_sent_key_value_cache_by_remote_ptr_block_sync,
set_data_ipc,
share_external_data,
swap_cache_all_layers,
unset_data_ipc,
)

memory_allocated = paddle.device.cuda.memory_allocated

def get_peer_mem_addr(*args, **kwargs):
raise RuntimeError("CUDA no need of get_peer_mem_addr!")

elif current_platform.is_xpu():
paddle.set_device(f"xpu:{device}")
else:
raise RuntimeError("No supported platform")
from fastdeploy.model_executor.ops.xpu import (
cuda_host_alloc,
cuda_host_free,
get_output_kv_signal,
get_peer_mem_addr,
set_data_ipc,
share_external_data,
swap_cache_all_layers,
)

unset_data_ipc = None
memory_allocated = paddle.device.xpu.memory_allocated

def share_external_data_(cache, cache_name, cache_shape, use_ipc):
if current_platform.is_cuda():
cache = share_external_data(cache, cache_name, cache_shape)
elif current_platform.is_xpu():
cache = share_external_data(cache, cache_name, cache_shape, use_ipc)
else:
raise RuntimeError("No supported platform")
return cache
def get_data_ptr_ipc(*args, **kwargs):
raise RuntimeError("XPU get_data_ptr_ipc UNIMPLENENTED!")

def ipc_sent_key_value_cache_by_remote_ptr(*args, **kwargs):
raise RuntimeError("XPU ipc_sent_key_value_cache_by_remote_ptr UNIMPLENENTED")

def ipc_sent_key_value_cache_by_remote_ptr_block_sync(*args, **kwargs):
raise RuntimeError("XPU No ipc_sent_key_value_cache_by_remote_ptr UNIMPLENENTED")
Comment on lines +56 to +62
Copy link

Copilot AI Nov 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Spelling error: "UNIMPLENENTED" should be "UNIMPLEMENTED".

Suggested change
raise RuntimeError("XPU get_data_ptr_ipc UNIMPLENENTED!")
def ipc_sent_key_value_cache_by_remote_ptr(*args, **kwargs):
raise RuntimeError("XPU ipc_sent_key_value_cache_by_remote_ptr UNIMPLENENTED")
def ipc_sent_key_value_cache_by_remote_ptr_block_sync(*args, **kwargs):
raise RuntimeError("XPU No ipc_sent_key_value_cache_by_remote_ptr UNIMPLENENTED")
raise RuntimeError("XPU get_data_ptr_ipc UNIMPLEMENTED!")
def ipc_sent_key_value_cache_by_remote_ptr(*args, **kwargs):
raise RuntimeError("XPU ipc_sent_key_value_cache_by_remote_ptr UNIMPLEMENTED")
def ipc_sent_key_value_cache_by_remote_ptr_block_sync(*args, **kwargs):
raise RuntimeError("XPU No ipc_sent_key_value_cache_by_remote_ptr UNIMPLEMENTED")

Copilot uses AI. Check for mistakes.
Comment on lines +56 to +62
Copy link

Copilot AI Nov 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Spelling error: "UNIMPLENENTED" should be "UNIMPLEMENTED".

Suggested change
raise RuntimeError("XPU get_data_ptr_ipc UNIMPLENENTED!")
def ipc_sent_key_value_cache_by_remote_ptr(*args, **kwargs):
raise RuntimeError("XPU ipc_sent_key_value_cache_by_remote_ptr UNIMPLENENTED")
def ipc_sent_key_value_cache_by_remote_ptr_block_sync(*args, **kwargs):
raise RuntimeError("XPU No ipc_sent_key_value_cache_by_remote_ptr UNIMPLENENTED")
raise RuntimeError("XPU get_data_ptr_ipc UNIMPLEMENTED!")
def ipc_sent_key_value_cache_by_remote_ptr(*args, **kwargs):
raise RuntimeError("XPU ipc_sent_key_value_cache_by_remote_ptr UNIMPLEMENTED")
def ipc_sent_key_value_cache_by_remote_ptr_block_sync(*args, **kwargs):
raise RuntimeError("XPU No ipc_sent_key_value_cache_by_remote_ptr UNIMPLEMENTED")

Copilot uses AI. Check for mistakes.
Comment on lines +56 to +62
Copy link

Copilot AI Nov 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Spelling error: "UNIMPLENENTED" should be "UNIMPLEMENTED".

Suggested change
raise RuntimeError("XPU get_data_ptr_ipc UNIMPLENENTED!")
def ipc_sent_key_value_cache_by_remote_ptr(*args, **kwargs):
raise RuntimeError("XPU ipc_sent_key_value_cache_by_remote_ptr UNIMPLENENTED")
def ipc_sent_key_value_cache_by_remote_ptr_block_sync(*args, **kwargs):
raise RuntimeError("XPU No ipc_sent_key_value_cache_by_remote_ptr UNIMPLENENTED")
raise RuntimeError("XPU get_data_ptr_ipc UNIMPLEMENTED!")
def ipc_sent_key_value_cache_by_remote_ptr(*args, **kwargs):
raise RuntimeError("XPU ipc_sent_key_value_cache_by_remote_ptr UNIMPLEMENTED")
def ipc_sent_key_value_cache_by_remote_ptr_block_sync(*args, **kwargs):
raise RuntimeError("XPU No ipc_sent_key_value_cache_by_remote_ptr UNIMPLEMENTED")

Copilot uses AI. Check for mistakes.

def get_all_visible_devices():
if current_platform.is_xpu():
return "XPU_VISIBLE_DEVICES=0,1,2,3,4,5,6,7"
else:
return "CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7"
raise RuntimeError("Prefix cache ops only supported CUDA nor XPU platform ")

def set_device(device):
if current_platform.is_cuda():
paddle.set_device(f"gpu:{device}")
elif current_platform.is_xpu():
paddle.set_device(f"xpu:{device}")
else:
raise RuntimeError("No supported platform")

def share_external_data_(cache, cache_name, cache_shape, use_ipc):
if current_platform.is_cuda():
cache = share_external_data(cache, cache_name, cache_shape)
elif current_platform.is_xpu():
cache = share_external_data(cache, cache_name, cache_shape, use_ipc)
else:
raise RuntimeError("No supported platform")
return cache

def get_all_visible_devices():
if current_platform.is_xpu():
return "XPU_VISIBLE_DEVICES=0,1,2,3,4,5,6,7"
else:
return "CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7"

except:
Copy link

Copilot AI Nov 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bare except clause is a bad practice: Using a bare except: catches all exceptions including SystemExit, KeyboardInterrupt, and other exceptions that should typically propagate. This can mask unexpected errors and make debugging difficult.

Consider catching specific exceptions instead:

except (ImportError, ModuleNotFoundError, AttributeError) as e:
    # Log the error for debugging
    # Then set fallback values

This would catch import-related errors while allowing critical exceptions to propagate properly.

Copilot uses AI. Check for mistakes.
cuda_host_alloc = None
cuda_host_free = None
set_data_ipc = None
share_external_data_ = None
swap_cache_all_layers = None
unset_data_ipc = None
set_device = None
memory_allocated = None
get_output_kv_signal = None
get_data_ptr_ipc = None
ipc_sent_key_value_cache_by_remote_ptr = None
ipc_sent_key_value_cache_by_remote_ptr_block_sync = None
get_peer_mem_addr = None
get_all_visible_devices = None
Comment on lines +91 to +104
Copy link

Copilot AI Nov 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Silent failure with no logging: The except block silently sets all functions to None without any logging or warning. This makes it extremely difficult to diagnose issues when imports fail.

Consider adding logging to help with debugging:

except (ImportError, ModuleNotFoundError) as e:
    import logging
    logger = logging.getLogger(__name__)
    logger.warning(f"Failed to import platform-specific ops: {e}. Setting fallback values.")
    # Set None values

This would help developers and users understand why operations are unavailable.

Copilot uses AI. Check for mistakes.

Comment on lines +91 to 105
Copy link

Copilot AI Nov 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Setting functions to None can cause AttributeError at runtime: When these functions are set to None in the except block, any code that tries to call them (e.g., cuda_host_alloc()) will fail with TypeError: 'NoneType' object is not callable. This defers error detection to runtime rather than providing clear feedback at import time.

Consider either:

  1. Raising a clear error at import time if these operations are critical
  2. Creating stub functions that raise informative errors when called:
def _create_stub(name):
    def stub(*args, **kwargs):
        raise RuntimeError(f"{name} is not available: platform-specific ops failed to import")
    return stub

# Then in the except block:
cuda_host_alloc = _create_stub("cuda_host_alloc")
cuda_host_free = _create_stub("cuda_host_free")
# etc.

This provides better error messages when the functions are actually used.

Suggested change
cuda_host_alloc = None
cuda_host_free = None
set_data_ipc = None
share_external_data_ = None
swap_cache_all_layers = None
unset_data_ipc = None
set_device = None
memory_allocated = None
get_output_kv_signal = None
get_data_ptr_ipc = None
ipc_sent_key_value_cache_by_remote_ptr = None
ipc_sent_key_value_cache_by_remote_ptr_block_sync = None
get_peer_mem_addr = None
get_all_visible_devices = None
def _create_stub(name):
def stub(*args, **kwargs):
raise RuntimeError(f"{name} is not available: platform-specific ops failed to import")
return stub
cuda_host_alloc = _create_stub("cuda_host_alloc")
cuda_host_free = _create_stub("cuda_host_free")
set_data_ipc = _create_stub("set_data_ipc")
share_external_data_ = _create_stub("share_external_data_")
swap_cache_all_layers = _create_stub("swap_cache_all_layers")
unset_data_ipc = _create_stub("unset_data_ipc")
set_device = _create_stub("set_device")
memory_allocated = _create_stub("memory_allocated")
get_output_kv_signal = _create_stub("get_output_kv_signal")
get_data_ptr_ipc = _create_stub("get_data_ptr_ipc")
ipc_sent_key_value_cache_by_remote_ptr = _create_stub("ipc_sent_key_value_cache_by_remote_ptr")
ipc_sent_key_value_cache_by_remote_ptr_block_sync = _create_stub("ipc_sent_key_value_cache_by_remote_ptr_block_sync")
get_peer_mem_addr = _create_stub("get_peer_mem_addr")
get_all_visible_devices = _create_stub("get_all_visible_devices")

Copilot uses AI. Check for mistakes.

__all__ = [
Expand Down
Loading