In [1]:
# https://github.com/groq/groq-api-cookbook/blob/main/tutorials/structured-output-instructor/structured_output_instructor.ipynb

In [2]:
import os
import instructor
from dotenv import load_dotenv
from pydantic import BaseModel, Field
from groq import Groq
from pprint import pprint

model = "llama-3.3-70b-versatile"

# Load the Groq API key from .env file
load_dotenv()
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
print(f"GROQ API Key exists and begins {GROQ_API_KEY[:14]}...")

GROQ API Key exists and begins gsk_0yKDCuUXkz...


In [3]:
# Describe the desired output schema using pydantic models
# Pydantic is a coercion and validation library and we will ber able to gracefully handle the output knowing its structure.
class UserInfo(BaseModel):
    name: str = Field(
        default="Unknown", description="User's name, default is 'Unknown'"
    )
    age: int = Field(default=0, description="User's age, default is 0 if not specified")
    email: str = Field(
        default="Unknown", description="User's email, default is 'Unknown'"
    )

    location: str = Field(
        default="Unknown", description="User's location, default is 'Unknown'"
    )


# The text to extract data from
text = """
John Doe, a 42-year-old software engineer from Southampton, has been working with large language models for several years.

john.doe@example.com

"""

# Patch Groq() with instructor, this is where the magic happens!
client = instructor.from_groq(Groq(), mode=instructor.Mode.JSON)

# Call the API
user_info = client.chat.completions.create(
    model=model,
    response_model=UserInfo,  # Specify the response model
    messages=[
        {
            "role": "system",
            "content": "Your job is to extract user information from the given text to populate the UserInfo model - if you don't knwo any field, leave it blank.",
        },
        {"role": "user", "content": text},
    ],
    temperature=0.0,
)
print(type(user_info))
print(f"Name: {user_info.name}")
print(f"Age: {user_info.age}")
print(f"Email: {user_info.email}")
print(f"Location: {user_info.location}")

<class '__main__.UserInfo'>
Name: John Doe
Age: 42
Email: john.doe@example.com
Location: Southampton


In the example above, we've defined a simple pydantic model `UserInfo` that specifies a person's name (as a string), age (as an integer), and email (as a string). The `instructor` library ensures that the Groq model's output adheres to this schema. The great thing here is that the `instructor` library ensures the response is valid according to the schema you provided. This eliminates the need for manual validation and reduces the likelihood of errors creeping into your data.