In [32]:
from typing import List

import instructor
from dotenv import load_dotenv
from openai import OpenAI
from pydantic import BaseModel

In [33]:
load_dotenv()

True

In [34]:
OPENAI_MODEL = "gpt-4o-mini"

In [35]:
class Employee(BaseModel):
    name: str
    position: str
    department: str
    hire_date: str
    salary: int

In [36]:
client = instructor.from_openai(OpenAI())

## extract single entity

In [37]:
text = """
John Doe is a Software Engineer in the IT department. He was hired on January 15, 2020, and his current salary is $85,000.
"""

In [38]:
employee = client.chat.completions.create(
    model=OPENAI_MODEL,
    response_model=Employee,
    messages=[{"role": "user", "content": text}],
)

In [39]:
employee

Employee(name='John Doe', position='Software Engineer', department='IT', hire_date='2020-01-15', salary=85000)

## extract multiple entities

In [40]:
text = """
John Doe is a Software Engineer in the IT department. He was hired on January 15, 2020, and his current salary is $85,000. Jane Smith works as a Marketing Manager in the Marketing department. She started her job on March 23, 2018, and her salary is $92,000. Emily Johnson, who is a Product Manager in the Product department, joined the company on July 11, 2019, and earns $105,000. Michael Brown is a Data Analyst in the Data Science department. He was hired on October 1, 2021, with a salary of $78,000."""

In [41]:
employees = client.chat.completions.create(
    model=OPENAI_MODEL,
    response_model=List[Employee],
    messages=[{"role": "user", "content": text}],
)

In [42]:
for employee in employees:
    print(employee)

name='John Doe' position='Software Engineer' department='IT' hire_date='2020-01-15' salary=85000
name='Jane Smith' position='Marketing Manager' department='Marketing' hire_date='2018-03-23' salary=92000
name='Emily Johnson' position='Product Manager' department='Product' hire_date='2019-07-11' salary=105000
name='Michael Brown' position='Data Analyst' department='Data Science' hire_date='2021-10-01' salary=78000


## view LLM API response, along with the extracted data

https://python.useinstructor.com/#returning-the-original-completion-create_with_completion

In [43]:
text = """
John Doe is a Software Engineer in the IT department. He was hired on January 15, 2020, and his current salary is $85,000.
"""

In [44]:
employee, completion = client.chat.completions.create_with_completion(
    model=OPENAI_MODEL,
    response_model=Employee,
    messages=[{"role": "user", "content": text}],
)

In [45]:
dict(employee)

{'name': 'John Doe',
 'position': 'Software Engineer',
 'department': 'IT',
 'hire_date': '2020-01-15',
 'salary': 85000}

In [46]:
dict(completion)

{'id': 'chatcmpl-A1JeNBlORTDdGW4VmhlqhaPmoN4cL',
 'choices': [Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_saJHHHtHV4KbZfgW0iW4gw1i', function=Function(arguments='{"name":"John Doe","position":"Software Engineer","department":"IT","hire_date":"2020-01-15","salary":85000}', name='Employee'), type='function')]))],
 'created': 1724877227,
 'model': 'gpt-4o-mini-2024-07-18',
 'object': 'chat.completion',
 'service_tier': None,
 'system_fingerprint': 'fp_f33667828e',
 'usage': CompletionUsage(completion_tokens=30, prompt_tokens=122, total_tokens=152)}