[platform] Allow platform specify attention backend

wangxiyuan · wangxiyuan · commit e06ad5736b13 · 2024-12-30T11:33:50.000+08:00
diff --git a/vllm/attention/selector.py b/vllm/attention/selector.py
@@ -168,6 +168,15 @@ def _cached_get_attn_backend(
             PlaceholderAttentionBackend)
         return PlaceholderAttentionBackend
     else:
+        # If the backend is not specified, it may be a plugin platform. Use the
+        # default backend impl from it instead.
+        impl = current_platform.get_default_attn_backend_impl()
+        if impl:
+            assert callable(impl), (
+                "The default attention backend implementation is not callable, "
+                f"platform: {current_platform.device_name}")
+            return impl
+
         raise ValueError("Invalid attention backend.")
 
 
diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py
@@ -2,7 +2,7 @@
 import platform
 import random
 from platform import uname
-from typing import TYPE_CHECKING, NamedTuple, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Callable, NamedTuple, Optional, Tuple, Union
 
 import numpy as np
 import torch
@@ -116,6 +116,11 @@ def get_default_attn_backend(cls, selected_backend: _Backend):
         """Get the default attention backend of a device."""
         return None
 
+    @classmethod
+    def get_default_attn_backend_impl(cls) -> Optional[Callable]:
+        """Get the default attention backend implementation of a device."""
+        return None
+
     @classmethod
     def get_device_capability(
         cls,