forked from vllm-project/vllm
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Doc] Indicate more information about supported modalities (vllm-proj…
- Loading branch information
1 parent
e39ebf5
commit 288a938
Showing
7 changed files
with
206 additions
and
51 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
""" | ||
This example shows how to use vLLM for running offline inference with | ||
multi-image input on vision language models, using the chat template defined | ||
by the model. | ||
""" | ||
from argparse import Namespace | ||
from typing import List | ||
|
||
from vllm import LLM | ||
from vllm.multimodal.utils import fetch_image | ||
from vllm.utils import FlexibleArgumentParser | ||
|
||
QUESTION = "What is the content of each image?" | ||
IMAGE_URLS = [ | ||
"https://upload.wikimedia.org/wikipedia/commons/d/da/2015_Kaczka_krzy%C5%BCowka_w_wodzie_%28samiec%29.jpg", | ||
"https://upload.wikimedia.org/wikipedia/commons/7/77/002_The_lion_king_Snyggve_in_the_Serengeti_National_Park_Photo_by_Giles_Laurent.jpg", | ||
] | ||
|
||
|
||
def _load_phi3v(image_urls: List[str]): | ||
return LLM( | ||
model="microsoft/Phi-3.5-vision-instruct", | ||
trust_remote_code=True, | ||
max_model_len=4096, | ||
limit_mm_per_prompt={"image": len(image_urls)}, | ||
) | ||
|
||
|
||
def run_phi3v_generate(question: str, image_urls: List[str]): | ||
llm = _load_phi3v(image_urls) | ||
|
||
placeholders = "\n".join(f"<|image_{i}|>" | ||
for i, _ in enumerate(image_urls, start=1)) | ||
prompt = f"<|user|>\n{placeholders}\n{question}<|end|>\n<|assistant|>\n" | ||
|
||
outputs = llm.generate({ | ||
"prompt": prompt, | ||
"multi_modal_data": { | ||
"image": [fetch_image(url) for url in image_urls] | ||
}, | ||
}) | ||
|
||
for o in outputs: | ||
generated_text = o.outputs[0].text | ||
print(generated_text) | ||
|
||
|
||
def run_phi3v_chat(question: str, image_urls: List[str]): | ||
llm = _load_phi3v(image_urls) | ||
|
||
outputs = llm.chat([{ | ||
"role": | ||
"user", | ||
"content": [ | ||
{ | ||
"type": "text", | ||
"text": question, | ||
}, | ||
*({ | ||
"type": "image_url", | ||
"image_url": { | ||
"url": image_url | ||
}, | ||
} for image_url in image_urls), | ||
], | ||
}]) | ||
|
||
for o in outputs: | ||
generated_text = o.outputs[0].text | ||
print(generated_text) | ||
|
||
|
||
def main(args: Namespace): | ||
method = args.method | ||
|
||
if method == "generate": | ||
run_phi3v_generate(QUESTION, IMAGE_URLS) | ||
elif method == "chat": | ||
run_phi3v_chat(QUESTION, IMAGE_URLS) | ||
else: | ||
raise ValueError(f"Invalid method: {method}") | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = FlexibleArgumentParser( | ||
description='Demo on using vLLM for offline inference with ' | ||
'vision language models that support multi-image input') | ||
parser.add_argument("--method", | ||
type=str, | ||
default="generate", | ||
choices=["generate", "chat"], | ||
help="The method to run in `vllm.LLM`.") | ||
|
||
args = parser.parse_args() | ||
main(args) |
Oops, something went wrong.