# Use LLM for Metadata extraction

This demo uses a large language model for metadata extraction from text.

Uses model hosted on Fireworks.ai to simplify set up. Can also use a local or self-hosted model.

In [None]:
!pip3 install openai==1.39.0 pydantic==2.8.2

In [None]:
from ipython_secrets import get_secret
import os
os.environ["OPENAI_API_KEY"] = get_secret('OPENAI_API_KEY')

In [None]:
import os 
from openai import OpenAI 
from pydantic import BaseModel 
import json 
 
# Create client to call model 
api_key = os.environ["OPENAI_API_KEY"] 
client = OpenAI( 
    api_key=api_key, 
) 
 
# Format response structure 
class TopicsResult(BaseModel): 
    topics: list[str] 
 
function_definition = {
    "name": "get_topics",
    "description": "Extract the key topics from the text",
    "parameters": json.loads(TopicsResult.schema_json())
}
response = client.chat.completions.create( 
    model="gpt-4o-mini", 
    functions=[function_definition], 
    function_call={ "name": function_definition["name"] },
    messages=[ 
        { 
            "role": "system", 
            "content": "Extract key topics from the following text. Include no more than 3 key terms. Format response as a JSON object.", 
        }, 
        { 
            "role": "user", 
            "content": "Eggs, like milk, form a typical food, inasmuch as they contain all the elements, in the right proportion, necessary for the support of the body. Their highly concentrated, nutritive value renders it necessary to use them in combination with other foods rich in starch (bread, potatoes, etc.). In order that the stomach may have enough to act upon, a certain amount of bulk must be furnished." 
        } 
    ], 
) 
 
# Get model results as a dict 
content = TopicsResult.model_validate(json.loads(response.choices[0].message.function_call.arguments)) 
 
print(f"Topics: {content.topics}")