# Extract metadata from a search query using `instructor`

Using `litellm` should make this LLM provider independent.

## Setup

In [None]:
from enum import Enum
from datetime import date
from textwrap import dedent
from typing import Optional

import instructor
import litellm

from dotenv import load_dotenv
from litellm import completion
from pydantic import BaseModel

In [None]:
load_dotenv()

In [None]:
use_langfuse = True

In [None]:
if use_langfuse:
    litellm.success_callback = ["langfuse"]
    litellm.failure_callback = ["langfuse"]

In [None]:
client = instructor.from_litellm(completion)

## User query example

In [None]:
query = "find all mvies from last Monday onwards"

In [None]:
class Dataset(Enum):
    SONGS = "songs"
    MOVIES = "movies"
    BOOKS = "books"

In [None]:
class Query(BaseModel):
    dataset: Dataset
    start_date: Optional[date]
    end_date: Optional[date]

In [None]:
days = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
user_prompt = f"""
Today is {days[date.today().weekday()]}, {date.today()}. Process the following query:

{query}
"""

In [None]:
print(user_prompt)

In [None]:
resp = client.chat.completions.create(
    model="azure/gpt-35-turbo-1106",
    max_tokens=1024,
    messages=[
        {
            "role": "user",
            "content": user_prompt
        }
    ],
    response_model=Query, # !!!
)

In [None]:
assert isinstance(resp, Query)
assert resp.dataset == Dataset.MOVIES
assert resp.start_date == date(2024, 6, 3) # will fail when running this in the future
assert resp.end_date is None

In [None]:
resp

## WHY?

- we made LLM output structured and controllable (not a stochstic process)
  - enables building proper control-flow and error handling (this is **HUGE**)
- building on top of Pydantic and its schema validation and its ecosystem

## TODO?

- validation directly in Pydantic vague about week day

## Suggested excercise

1. set up your environment and run this notebook
2. expand the Query model to include more fields, e.g, "author"
3. define a custom validator: https://python.useinstructor.com/concepts/reask_validation/#pydantic