In [281]:
from typing import (
    Any,
    Optional,
    Literal,
    Dict,
    Type,
    TypeVar,
    Union,
    cast,
    Sequence,
    Callable,
)
from operator import itemgetter

from pydantic import BaseModel
from langchain_core.runnables import (
    Runnable,
    RunnableConfig,
    ensure_config,
    RunnablePassthrough,
    RunnableMap,
    chain,
)
from langchain_core.language_models import LanguageModelInput
from langchain_core.messages import AIMessage, BaseMessage
from langchain_core.tools import BaseTool
from langchain_community.llms.vllm import VLLMOpenAI
from langchain_core.utils.pydantic import (
    PydanticBaseModel,
    TypeBaseModel,
    is_basemodel_subclass,
)
from langchain_core.utils.function_calling import (
    convert_to_openai_tool,
    convert_to_openai_function,
)
from langchain_core.output_parsers import JsonOutputParser, PydanticOutputParser
from langchain_core.output_parsers.openai_tools import (
    JsonOutputKeyToolsParser,
    PydanticToolsParser,
)

_BM = TypeVar("_BM", bound=BaseModel)
_DictOrPydanticClass = Union[Dict[str, Any], Type[_BM], Type]
_DictOrPydantic = Union[Dict, _BM]

In [282]:
def _is_pydantic_class(obj: Any) -> bool:
    return isinstance(obj, type) and is_basemodel_subclass(obj)

In [283]:
def _convert_to_openai_response_format(
    schema: Union[Dict[str, Any], Type], strict: bool
) -> Union[Dict, TypeBaseModel]:
    if isinstance(schema, type) and is_basemodel_subclass(schema):
        return schema
    else:
        function = convert_to_openai_function(schema, strict=strict)
        function["schema"] = function.pop("parameters")
        return {"type": "json_schema", "json_schema": function}

In [284]:
@chain
def _oai_structured_outputs_parser(ai_msg: AIMessage) -> PydanticBaseModel:
    if ai_msg.additional_kwargs.get("parsed"):
        return ai_msg.additional_kwargs["parsed"]
    elif ai_msg.additional_kwargs.get("refusal"):
        raise ValueError(ai_msg.additional_kwargs["refusal"])
    else:
        raise ValueError(
            "Structured Output response does not have a 'parsed' field nor a 'refusal' "
            "field. Received message:\n\n{ai_msg}"
        )

In [285]:
class Summary(BaseModel):
    summary: str

In [286]:
class VLLMClient(VLLMOpenAI):
    """
    A wrapper around the VLLMOpenAI that supports the langchain interface.
    """

    async def ainvoke(
        self,
        input: LanguageModelInput,
        config: Optional[RunnableConfig] = None,
        stop: Optional[list[str]] = None,
        **kwargs: Any,
    ) -> AIMessage:
        config = ensure_config(config)
        llm_result = await self.agenerate_prompt(
            [self._convert_input(input)],
            stop=stop,
            callbacks=config.get("callbacks"),
            tags=config.get("tags"),
            metadata=config.get("metadata"),
            run_name=config.get("run_name"),
            run_id=config.pop("run_id", None),
        )
        raw_res = llm_result.generations[0][0].text
        return AIMessage(content=raw_res)

    def bind_tools(
        self,
        tools: Sequence[Union[Dict[str, Any], Type, Callable, BaseTool]],
        *,
        tool_choice: Optional[
            Union[dict, str, Literal["auto", "none", "required", "any"], bool]
        ] = None,
        strict: Optional[bool] = None,
        **kwargs: Any,
    ) -> Runnable[LanguageModelInput, BaseMessage]:
        formatted_tools = [
            convert_to_openai_tool(tool, strict=strict) for tool in tools
        ]
        if tool_choice:
            if isinstance(tool_choice, str):
                # tool_choice is a tool/function name
                if tool_choice not in ("auto", "none", "any", "required"):
                    tool_choice = {
                        "type": "function",
                        "function": {"name": tool_choice},
                    }
                # 'any' is not natively supported by OpenAI API.
                # We support 'any' since other models use this instead of 'required'.
                if tool_choice == "any":
                    tool_choice = "required"
            elif isinstance(tool_choice, bool):
                tool_choice = "required"
            elif isinstance(tool_choice, dict):
                tool_names = [
                    formatted_tool["function"]["name"]
                    for formatted_tool in formatted_tools
                ]
                if not any(
                    tool_name == tool_choice["function"]["name"]
                    for tool_name in tool_names
                ):
                    raise ValueError(
                        f"Tool choice {tool_choice} was specified, but the only "
                        f"provided tools were {tool_names}."
                    )
            kwargs["tool_choice"] = tool_choice
        return super().bind(tools=formatted_tools, **kwargs)

    def with_structured_output(
        self,
        schema: Optional[_DictOrPydanticClass] = None,
        *,
        method: Literal[
            "function_calling", "json_mode", "json_schema"
        ] = "function_calling",
        include_raw: bool = False,
        strict: Optional[bool] = None,
        **kwargs: Any,
    ) -> Runnable[LanguageModelInput, _DictOrPydantic]:

        if kwargs:
            raise ValueError(f"Received unsupported arguments {kwargs}")
        if strict is not None and method == "json_mode":
            raise ValueError(
                "Argument `strict` is not supported with `method`='json_mode'"
            )
        is_pydantic_schema = _is_pydantic_class(schema)
        if method == "function_calling":
            if schema is None:
                raise ValueError(
                    "schema must be specified when method is not 'json_mode'. "
                    "Received None."
                )
            tool_name = convert_to_openai_tool(schema)["function"]["name"]
            # bind_kwargs = self._filter_disabled_params(
            #     tool_choice=tool_name, parallel_tool_calls=False, strict=strict
            # )

            llm = self.bind_tools([schema])
            if is_pydantic_schema:
                output_parser: Runnable = PydanticToolsParser(
                    tools=[schema],  # type: ignore[list-item]
                    first_tool_only=True,  # type: ignore[list-item]
                )
            else:
                output_parser = JsonOutputKeyToolsParser(
                    key_name=tool_name, first_tool_only=True
                )
        elif method == "json_mode":
            llm = self.bind(response_format={"type": "json_object"})
            output_parser = (
                PydanticOutputParser(pydantic_object=schema)  # type: ignore[arg-type]
                if is_pydantic_schema
                else JsonOutputParser()
            )
        elif method == "json_schema":
            if schema is None:
                raise ValueError(
                    "schema must be specified when method is not 'json_mode'. "
                    "Received None."
                )
            strict = strict if strict is not None else True
            response_format = _convert_to_openai_response_format(schema, strict=strict)
            llm = self.bind(response_format=response_format)
            if is_pydantic_schema:
                output_parser = _oai_structured_outputs_parser.with_types(
                    output_type=cast(type, schema)
                )
            else:
                output_parser = JsonOutputParser()

        if include_raw:
            parser_assign = RunnablePassthrough.assign(
                parsed=itemgetter("raw") | output_parser, parsing_error=lambda _: None
            )
            parser_none = RunnablePassthrough.assign(parsed=lambda _: None)
            parser_with_fallback = parser_assign.with_fallbacks(
                [parser_none], exception_key="parsing_error"
            )
            return RunnableMap(raw=llm) | parser_with_fallback
        else:
            return llm | output_parser

In [287]:
test_prompt = """
Summarize the following documents in relation to the user's question.
Make sure to include the following sources of the information used to create the summary.
Include the markdown-formatted URLs of the images used to create the summary in your response.

User Question:
What are some women's shoe fashion trends?

Source Documents:
https://www.whowhatwear.com/fashion/fall/fall-shoe-trends-2024:
These Are the 7 Most-Wanted Shoe Trends for Fall 2024 | Who What Wear These Are the 7 Most-Wanted Shoe Trends for Fall 2024 Continue on to get your fill of the major fall 2024 shoe trends and shop the pairs I love most. Bold, sculptural hardware was a theme across the fall collections, and designers each had their own unique take on these statement shoes, whether that was Tory Burch's popular Pierced Pumps or the new chain-link slingbacks from Gucci that everyone's talking about. The Carven runway featured one shoe style I haven't gotten my mind off of, and that is square-toe mules.
https://avecsport.com/blog/the-best-trends-in-athleisure-wear-2024:
Athleisure outfits. Whether you're looking to pair a casual top with leggings, or you would prefer sporting a warmer hoodie and joggers combination, with athleisure trends constantly developing, we've listed a range of men's and women's athleisure wear trends for 2024. Women's athleisure trends
https://www.marieclaire.com/fashion/summer-2024-accessory-trends/:
Fashion; These Key Summer 2024 Accessory Trends Inspire Effortless Style . From slouchy sling bags to '80s-era button earrings, these accessories encourage a laid-back fashion ethos.
https://www.lyv-on.com/blog/top-10-sustainable-fashion-trends-in-2024:
Top 10 Sustainable Fashion Trends in 2024 Sustainable fashion continues to rise in popularity as consumers and brands alike prioritize environmental responsibility. Organic cotton remains a staple in sustainable fashion due to its reduced environmental impact. These materials are produced through sustainable processes that use less water and energy compared to conventional textiles, making them a green choice for eco-conscious consumers. This model reduces waste and encourages the reuse of materials, creating a more sustainable fashion ecosystem. These dyes are less harmful to the environment and often produce beautiful, unique color variations, making them a popular choice for eco-friendly fashion. As sustainable fashion continues to evolve, these trends highlight the industry's commitment to reducing its environmental impact while offering stylish and high-quality products. Fashion Sustainable Fashion
https://www.thetrendspotter.net/street-style-from-paris-fashion-week-aw24/:
Photos by The Style Stalker. Paris Fashion Week AW24 street style brought us plenty of inspiration with so many more eye-catching trends. With freezing conditions comes layering. We saw plenty of black and cream maxi skirts worn with long coats, structured blazers, and trench coats. Fashion-goers paid attention to unusual textures, with fringed

Sources Links:
https://www.whowhatwear.com/fashion/fall/fall-shoe-trends-2024
https://avecsport.com/blog/the-best-trends-in-athleisure-wear-2024
https://www.marieclaire.com/fashion/summer-2024-accessory-trends/
https://www.lyv-on.com/blog/top-10-sustainable-fashion-trends-in-2024
https://www.thetrendspotter.net/street-style-from-paris-fashion-week-aw24/

Image URLs:
http://localhost:9002/fashion-images/0323544b-4129-4205-8ce0-5d40b200417b?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=minioadmin%2F20241024%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20241024T143545Z&X-Amz-Expires=604800&X-Amz-SignedHeaders=host&X-Amz-Signature=6f944c6890b217fbad7c68542109b49eacfd90f613eb55b1fcbf4dfb3139c64c
http://localhost:9002/fashion-images/293d6a43-c9da-488d-87f9-50f2b60a7c9f?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=minioadmin%2F20241024%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20241024T143607Z&X-Amz-Expires=604800&X-Amz-SignedHeaders=host&X-Amz-Signature=eeb59479abd994c7811f2d10433c43dce68fb184fda351a78d2100e24f138785
http://localhost:9002/fashion-images/04933fcb-ce97-43c2-a91b-c098274fb66b?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=minioadmin%2F20241024%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20241024T145546Z&X-Amz-Expires=604800&X-Amz-SignedHeaders=host&X-Amz-Signature=84159d5a75bea7ae661920a2bd7b03cdf50b6281d85e3a380324ba074a14c34f
http://localhost:9002/fashion-images/2cd35b25-e541-4eb0-b5c4-a68ff1bf6fb7?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=minioadmin%2F20241024%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20241024T145609Z&X-Amz-Expires=604800&X-Amz-SignedHeaders=host&X-Amz-Signature=db7019c30f98b626542c2d77f570b832957edb8497460ae047f0e5d8abf08e94
http://localhost:9002/fashion-images/49b280dc-5e4d-4559-b85d-50e72c5387a5?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=minioadmin%2F20241024%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20241024T145611Z&X-Amz-Expires=604800&X-Amz-SignedHeaders=host&X-Amz-Signature=2e4cbc39de2c2bd4a38b1afabc5892fcacea772e73cc6afeb21a150f1ce70ccd

Please provide a concise and informative summary that directly addresses the question.
Provide the sources at the bottom in a separate sources section.

SUMMARY WITH SOURCES AND IMAGES:
"""

In [288]:
summarize_test_prompt = """
Given the folowing text, please provide a concise and informative summary that directly addresses the question.

TEXT:
There were also some cracks within Harris's campaign operation. The outreach to Black, Latino and working-class voters in swing states came too late and her message to those voters wasn't clear enough, several Democrats said.

Rep. Adam Smith (D., Wash.) said that Harris and Democrats appeared too close to the party's progressive flank. 

“The extreme left is leading us into a ditch,” Smith said, citing movements to defund the police and to liberalize border policy. “The second problem, of course, is that Harris chose not to distance herself from Biden.”

The political ground that Harris forfeited was expansive. Women as well as men shifted toward the Republican Party, compared with their preferences in 2020. Harris gave up a bit of her party's advantage among college-educated voters while losing substantially among voters without a four-year degree, who account for almost 60% of the electorate. Black voters doubled their support for Trump to 16%, while Latino support grew by 6 points, to 41%. Harris also lost ground among voters with less than $100,000 in household income while making gains among the smaller group of voters who earn more than that amount.

In Michigan on the eve of the election, one vivid sign the Democrats were in trouble came at a rally thrown by vice presidential nominee Tim Walz, held at downtown Detroit's Hart Plaza. The venue can accommodate thousands of people, but the crowd filled only a fraction of the space, according to one volunteer. At one point, campaign staffers asked people to cluster in bleachers that were in view of a camera, so it would give the impression of a full crowd.

Walz and Michigan Gov. Gretchen Whitmer were among those who delivered brief remarks to a lackluster reception. The rally also featured entertainers, including REM frontman Michael Stipe and Jon Bon Jovi, who sang hits that included a 1992 release that served as a rallying cry: “Keep the Faith.”

On Tuesday night, wealthy Democratic donors and operatives, who had been getting positive updates from the campaign throughout the day, watched in horror at the Conrad Hotel in Washington as election results came in. Many who had been invited to a VIP gathering at Howard University, where Harris's campaign held its election night party, decided to stay at the bar or go back to their hotel rooms to mourn alone.

“'We just got our asses kicked' was the sentiment of the night,” one of the attendees said, adding that although some were starting the finger pointing, Trump's victory was so resounding that most understood it was a larger problem that went beyond Harris's campaign. “We can't nitpick.”

SUMMARY:
"""

In [289]:
# client = VLLMClient(
#     openai_api_key="EMPTY",
#     openai_api_base="http://gort:8000/v1",
#     # model_name="mistralai/Mistral-7B-Instruct-v0.3",
#     model_name="meta-llama/Llama-3.1-8B-Instruct",
#     temperature=0.0,
# )
# # NOTE: vLLM supports named function calling and auto tool choice in the chat completion API.
# # The tool_choice options required is not yet supported but on the roadmap.
# structured_client = client.with_structured_output(
#     schema=Summary, include_raw=True, strict=False
# )

In [290]:
# res = await structured_client.ainvoke(summarize_test_prompt)
# print(res)

In [291]:
clothing_parser_prompt = """
  You are an expert fashion blogger and stylist and software engineer.
  Your goal is to parse the content of a web page to extract information about a clothing item displayed on the page.
  Return only one parsed clothing item and nothing else.
  When constructing the clothing item, make sure to use the URL of the page as the source of the clothing item.

  Web Page URL:
  https://www.jcrew.com/plp/mens/?trending=bestSeller

  Web Page Content:
            "ecommerce": {
                "impressions": [
                    
                    {
                        "name": "Men's Horsebit 1953 loafer",
                        "id": "307929BLM001000",
                        "price": "990.0",
                        
                        "category": "Gifts/Gifts-for-Men",
                        
                        "variant": "black leather",
                        "list": "ProductGrid",
                        "position": "1",
                        "dimension5": "non-sale",
                        "dimension24": "/ca/gifts/gifts-for-men-c-gifts-for-him"
                    },
                    
                    {
                        "name": "Gucci B medium shoulder bag",
                        "id": "817420FAD8Q9760",
                        "price": "2300.0",
                        
                        "category": "Gifts/Gifts-for-Men",
                        
                        "variant": "beige and dark brown canvas",
                        "list": "ProductGrid",
                        "position": "2",
                        "dimension5": "non-sale",
                        "dimension24": "/ca/gifts/gifts-for-men-c-gifts-for-him"
                    },
                    
                    {
                        "name": "GG cashmere shawl",
                        "id": "8123174GABX2579",
                        "price": "995.0",
                        
                        "category": "Gifts/Gifts-for-Men",
                        
                        "variant": "beige",
                        "list": "ProductGrid",
                        "position": "3",
                        "dimension5": "non-sale",
                        "dimension24": "/ca/gifts/gifts-for-men-c-gifts-for-him"
                    },
                    
  
  Parsed Clothing Items:
"""

In [292]:
import json


from openai import OpenAI


# Modify OpenAI's API key and API base to use vLLM's API server.

openai_api_key = "EMPTY"

openai_api_base = "http://gort:8000/v1"


client = OpenAI(
    # defaults to os.environ.get("OPENAI_API_KEY")
    api_key=openai_api_key,
    base_url=openai_api_base,
)


models = client.models.list()

model = models.data[0].id


# tools = [
#     {
#         "type": "function",
#         "function": {
#             "name": "summarize_text",
#             "description": "Summarize the text provided",
#             "parameters": {
#                 "type": "object",
#                 "properties": {
#                     "text": {
#                         "type": "string",
#                         "description": "The text to summarize",
#                     },
#                 },
#                 "required": ["text"],
#             },
#         },
#     }
# ]

# tools = [
#     {
#         "type": "function",
#         "function": {
#             "name": "extract_clothing_items",
#             "description": "Given the text, extract the clothing items mentioned.",
#             "parameters": {
#                 "type": "array",
#                 "items": {
#                     "type": "object",
#                     "properties": {
#                         "name": {"type": "string"},
#                         "price": {"type": "number"},
#                         "image_url": {"type": "string"},
#                         "link": {"type": "string"},
#                     },
#                     "required": ["name"],
#                 },
#             },
#         },
#     }
# ]

tools = [
    {
        "type": "function",
        "function": {
            "name": "extract_clothing_item",
            "description": "Given the text, extract one clothing item mentioned.",
            "parameters": {
                "type": "object",
                "properties": {
                    "name": {"type": "string"},
                    "price": {"type": "number"},
                    "image_url": {"type": "string"},
                    "link": {"type": "string"},
                },
                "required": ["name"],
            },
        },
    }
]


messages = [
    {"role": "user", "content": "Hi! How are you doing today?"},
    {"role": "assistant", "content": "I'm doing well! How can I help you?"},
    {
        "role": "user",
        "content": clothing_parser_prompt,
    },
]


chat_completion = client.chat.completions.create(
    messages=messages, model=model, tools=tools
)
print(chat_completion)

ChatCompletion(id='chat-e5e6c266af8f4be3896b47c882e06e91', choices=[Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='chatcmpl-tool-db31567186bb4d4997ee12398d55740d', function=Function(arguments='{"name": "Men\'s Horsebit 1953 loafer", "price": 990.0, "image_url": "https://www.jcrew.com/plp/mens/?trending=bestSeller", "link": "https://www.jcrew.com/plp/mens/?trending=bestSeller"}', name='extract_clothing_item'), type='function')]), stop_reason=128008)], created=1731076885, model='meta-llama/Llama-3.1-8B-Instruct', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=79, prompt_tokens=767, total_tokens=846, completion_tokens_details=None, prompt_tokens_details=None), prompt_logprobs=None)


In [293]:
print(chat_completion.choices[0].message.tool_calls[0].function.arguments)


{"name": "Men's Horsebit 1953 loafer", "price": 990.0, "image_url": "https://www.jcrew.com/plp/mens/?trending=bestSeller", "link": "https://www.jcrew.com/plp/mens/?trending=bestSeller"}


### Small Model vLLM Tool Calling
Llama 3.1 works very well for SIMPLE tool calling with vLLM. It does NOT work well for more complex tool calling that involves long lists of objects.
Also, to support the large amount of parallelism needed for a real-time web crawler, the context provided to the model at every step must be small since having a large context window in vLLM takes an enormous amount of vRAM for the attention cache.

- Use Llama 3.1 8B for both tool calling and chat completion.