In [16]:
import multiprocessing
from langchain_community.chat_models import ChatLlamaCpp


# Path to your model weights
local_model = "../models/Hermes-2-Pro-Llama-3-8B-Q8.gguf"

# https://python.langchain.com/docs/integrations/chat/llamacpp/#instantiation
llm: ChatLlamaCpp = ChatLlamaCpp(
    temperature=0.5,
    model_path=local_model,
    n_ctx=10000,
    n_gpu_layers=8,
    n_batch=300,  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
    max_tokens=512,
    n_threads=multiprocessing.cpu_count() - 1,
    repeat_penalty=1.5,
    top_p=0.5,
    verbose=False,
    chat_format="llama-3"
    #verbose=True,
    #stop=["<|end_of_text|>", "<|eot_id|>"],
    #chat_format="chatml-function-calling"

)

In [17]:
messages = [
    (
        "system",
        "You are a helpful assistant that translates English to French. Translate the user sentence.",
    ),
    ("human", "I love programming."),
]

ai_msg = llm.invoke(messages)
ai_msg

AIMessage(content="J'aime programmer.", additional_kwargs={}, response_metadata={'finish_reason': 'stop'}, id='run-d2ec8553-a5f3-452e-a934-86c32fdffe74-0')

In [18]:

from typing import Optional
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from pydantic import BaseModel, Field


class Person(BaseModel):
    """Information about a person."""

    # ^ Doc-string for the entity Person.
    # This doc-string is sent to the LLM as the description of the schema Person,
    # and it can help to improve extraction results.

    # Note that:
    # 1. Each field is an `optional` -- this allows the model to decline to extract it!
    # 2. Each field has a `description` -- this description is used by the LLM.
    # Having a good description can help improve extraction results.
    name: Optional[str] = Field(default=None, description="The name of the person")
    hair_color: Optional[str] = Field(
        default=None, description="The color of the person's hair if known"
    )
    height_in_meters: Optional[str] = Field(
        default=None, description="Height measured in meters"
    )

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are an expert extraction algorithm. "
            "Only extract relevant information from the text. "
            "If you do not know the value of an attribute asked to extract, "
            "return null for the attribute's value.",
        ),
        # Please see the how-to about improving performance with
        # reference examples.
        # MessagesPlaceholder('examples'),
        ("human", "{text}"),
    ]
)

runnable = prompt | llm.with_structured_output(schema=Person)
text = "Alan Smith is 6 feet tall and has blond hair."
result = runnable.invoke({"text": text})
print(result)


None


In [9]:
#from langchain_core.utils.function_calling import convert_to_openai_tool
from typing import Dict
from pydantic import BaseModel, Field


# Pydantic
class Joke(BaseModel):
    """Joke to tell user."""

    setup: str = Field(description="The setup of the joke")
    punchline: str = Field(description="The punchline to the joke")
    rating: int | None = Field(
        default=None, description="How funny the joke is, from 1 to 10"
    )


# dict_schema = convert_to_openai_tool(Joke)
structured_llm = llm.with_structured_output(Joke)
result: Dict | Joke = structured_llm.invoke("Tell me a joke about birds")
print(result)


None
