Skip to content

Commit e06ad57

Browse files
committedDec 30, 2024
[platform] Allow platform specify attention backend
1 parent 0aa38d1 commit e06ad57

File tree

2 files changed

+15
-1
lines changed

2 files changed

+15
-1
lines changed
 

‎vllm/attention/selector.py

+9
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,15 @@ def _cached_get_attn_backend(
168168
PlaceholderAttentionBackend)
169169
return PlaceholderAttentionBackend
170170
else:
171+
# If the backend is not specified, it may be a plugin platform. Use the
172+
# default backend impl from it instead.
173+
impl = current_platform.get_default_attn_backend_impl()
174+
if impl:
175+
assert callable(impl), (
176+
"The default attention backend implementation is not callable, "
177+
f"platform: {current_platform.device_name}")
178+
return impl
179+
171180
raise ValueError("Invalid attention backend.")
172181

173182

‎vllm/platforms/interface.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import platform
33
import random
44
from platform import uname
5-
from typing import TYPE_CHECKING, NamedTuple, Optional, Tuple, Union
5+
from typing import TYPE_CHECKING, Callable, NamedTuple, Optional, Tuple, Union
66

77
import numpy as np
88
import torch
@@ -116,6 +116,11 @@ def get_default_attn_backend(cls, selected_backend: _Backend):
116116
"""Get the default attention backend of a device."""
117117
return None
118118

119+
@classmethod
120+
def get_default_attn_backend_impl(cls) -> Optional[Callable]:
121+
"""Get the default attention backend implementation of a device."""
122+
return None
123+
119124
@classmethod
120125
def get_device_capability(
121126
cls,

0 commit comments

Comments
 (0)
Failed to load comments.