In [1]:
# Please install OpenAI SDK first: `pip3 install openai`
from openai import OpenAI
import os
from dotenv import load_dotenv
import json

load_dotenv()

True

In [2]:
DEEPSEEK_API = os.getenv("DEEPSEEK_API")  # Load the API key from the environment variable

### Deepseek from `OpenAI` Trial

In [3]:
client = OpenAI(api_key=DEEPSEEK_API, base_url="https://api.deepseek.com")

response = client.chat.completions.create(
    model="deepseek-chat",
    messages=[
        {"role": "system", "content": "You are a helpful assistant"},
        {"role": "user", "content": "你是誰？我是你爸？"},
    ],
    stream=False
)

print(response.choices[0].message.content)

哈哈，這個開場白有點突然啊！不過如果你是在開玩笑的話——「爸，我記得你上次說要給我零用錢，是不是該兌現了？」（開個小玩笑～）  

其實我是個AI助手，專門負責回答問題、提供資訊或陪你聊聊天。如果有什麼需要幫忙的，儘管告訴我！ 😄  

（如果剛才的稱呼讓你覺得冒犯，也可以直接說哦，我會調整的～）


In [14]:
# Extract features using OpenAI
def extract_features(text: str) -> dict:
    prompt = f"""
    Extract the following features from the input text as a JSON object:
    - Date (ISO format, e.g., 2025-05-02; use today's date if not specified)
    - Category (either 'Income' or 'Expense', guess if not clear)
    - Description (short summary of the transaction)
    - Price (numeric value, assume USD if no currency specified)

    Input: {text}

    Return a JSON object with these fields. If a field cannot be determined, use reasonable defaults or null.
    """

    response = client.chat.completions.create(
        model="deepseek-chat",
        messages=[
            {"role": "system", "content": "You are a helpful assistant that extracts structured data from text."},
            {"role": "user", "content": prompt}
        ]
    )
    return response.choices[0].message.content

In [15]:
text = "2025-05-02, 1000, Salary"
features = extract_features(text)
print(features)

```json
{
  "Date": "2025-05-02",
  "Category": "Income",
  "Description": "Salary",
  "Price": 1000
}
```


### Structured LLM Output by `Langchain`

Reference:
1. How to return structured data from a model: <br> https://python.langchain.com/docs/how_to/structured_output/#the-with_structured_output-method
2. Structured outputs: <br> https://python.langchain.com/docs/concepts/structured_outputs/

In [4]:
import os
from langchain.chat_models import init_chat_model

os.environ["DEEPSEEK_API_KEY"] = DEEPSEEK_API  # Set the API key in the environment variable

llm = init_chat_model("deepseek-chat", model_provider="deepseek")

llm.invoke("What is the capital of France?")

AIMessage(content='The capital of France is **Paris**. It is one of the most famous and visited cities in the world, known for landmarks like the Eiffel Tower, the Louvre Museum, and Notre-Dame Cathedral.  \n\nWould you like recommendations for things to do in Paris? 😊', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 56, 'prompt_tokens': 10, 'total_tokens': 66, 'completion_tokens_details': None, 'prompt_tokens_details': {'audio_tokens': None, 'cached_tokens': 0}, 'prompt_cache_hit_tokens': 0, 'prompt_cache_miss_tokens': 10}, 'model_name': 'deepseek-chat', 'system_fingerprint': 'fp_8802369eaa_prod0425fp8', 'id': '3bd16abd-d114-44e2-a3e9-7fb674e5f229', 'service_tier': None, 'finish_reason': 'stop', 'logprobs': None}, id='run--1e2bf4fe-ee5a-48fb-907d-c04d339c2b22-0', usage_metadata={'input_tokens': 10, 'output_tokens': 56, 'total_tokens': 66, 'input_token_details': {'cache_read': 0}, 'output_token_details': {}})

In [12]:
from typing import Optional
from pydantic import BaseModel, Field


# Pydantic
class Joke(BaseModel):
    """Joke to tell user."""

    setup: str = Field(description="The setup of the joke")
    punchline: str = Field(description="The punchline to the joke")
    rating: Optional[int] = Field(
        default=None, description="How funny the joke is, from 1 to 10"
    )


structured_llm = llm.with_structured_output(Joke)
structured_llm.invoke("Tell me a joke about cats")

Joke(setup="Why don't cats play poker in the wild?", punchline='Because there are too many cheetahs!', rating=7)

In [None]:
text_input = "2025-05-02, 1000, Salary"

class FeaturesFormatter(BaseModel):
    """Feature formatter to extract features from text."""

    date: str = Field(description="Date of the transaction in ISO format (e.g., 2025-05-02)")
    category: str = Field(description="Category of the transaction")
    description: str = Field(description="Description of the transaction")
    price: float = Field(description="Price of the transaction in HKD")

# Initialize the LLM with structured output
structured_llm = llm.with_structured_output(FeaturesFormatter)

# Function to extract features from user input text
def extract_bookkeeping_features(text: str) -> dict:
    structured_output = structured_llm.invoke(text)
    return structured_output.model_dump()

# Example usage
user_input = "Eat five guys today for 59 HKD"
features = extract_bookkeeping_features(user_input)
print(json.dumps(features, indent=4))

{
    "date": "2023-10-26",
    "category": "Food & Dining",
    "description": "Eat five guys today",
    "price": 59.0
}
