In [1]:
# Please install OpenAI SDK first: `pip3 install openai`
from openai import OpenAI
import os
from dotenv import load_dotenv
import json
from typing import Optional
from pydantic import BaseModel, Field
import os
from langchain.chat_models import init_chat_model

load_dotenv()

True

In [5]:
DEEPSEEK_API = os.getenv("DEEPSEEK_API")  # Load the API key from the environment variable

### Deepseek from `OpenAI` Trial

In [3]:
client = OpenAI(api_key=DEEPSEEK_API, base_url="https://api.deepseek.com")

response = client.chat.completions.create(
    model="deepseek-chat",
    messages=[
        {"role": "system", "content": "You are a helpful assistant"},
        {"role": "user", "content": "你是誰？我是你爸？"},
    ],
    stream=False
)

print(response.choices[0].message.content)

哈哈，這個開場白有點突然啊！不過如果你是在開玩笑的話——「爸，我記得你上次說要給我零用錢，是不是該兌現了？」（開個小玩笑～）  

其實我是個AI助手，專門負責回答問題、提供資訊或陪你聊聊天。如果有什麼需要幫忙的，儘管告訴我！ 😄  

（如果剛才的稱呼讓你覺得冒犯，也可以直接說哦，我會調整的～）


In [14]:
# Extract features using OpenAI
def extract_features(text: str) -> dict:
    prompt = f"""
    Extract the following features from the input text as a JSON object:
    - Date (ISO format, e.g., 2025-05-02; use today's date if not specified)
    - Category (either 'Income' or 'Expense', guess if not clear)
    - Description (short summary of the transaction)
    - Price (numeric value, assume USD if no currency specified)

    Input: {text}

    Return a JSON object with these fields. If a field cannot be determined, use reasonable defaults or null.
    """

    response = client.chat.completions.create(
        model="deepseek-chat",
        messages=[
            {"role": "system", "content": "You are a helpful assistant that extracts structured data from text."},
            {"role": "user", "content": prompt}
        ]
    )
    return response.choices[0].message.content

In [15]:
text = "2025-05-02, 1000, Salary"
features = extract_features(text)
print(features)

```json
{
  "Date": "2025-05-02",
  "Category": "Income",
  "Description": "Salary",
  "Price": 1000
}
```


### Structured LLM Output by `Langchain`

Reference:
1. How to return structured data from a model: <br> https://python.langchain.com/docs/how_to/structured_output/#the-with_structured_output-method
2. Structured outputs: <br> https://python.langchain.com/docs/concepts/structured_outputs/

In [6]:
import os
from langchain.chat_models import init_chat_model

os.environ["DEEPSEEK_API_KEY"] = DEEPSEEK_API  # Set the API key in the environment variable

llm = init_chat_model("deepseek-chat", model_provider="deepseek")

# llm.invoke("What is the capital of France?")

In [None]:
text_input = "2025-05-02, 1000, Salary"

class FeaturesFormatter(BaseModel):
    """Feature formatter to extract features from text."""

    date: str = Field(description="Date of the transaction in ISO format (e.g., 2025-05-02)")
    category: str = Field(description="Category of the transaction")
    description: str = Field(description="Description of the transaction")
    price: float = Field(description="Price of the transaction in HKD")

# Initialize the LLM with structured output
structured_llm = llm.with_structured_output(FeaturesFormatter)

# Function to extract features from user input text
def extract_bookkeeping_features(text: str) -> dict:
    structured_output = structured_llm.invoke(text)
    return structured_output.model_dump()

# Example usage
user_input = "Eat five guys today for 59 HKD"
features = extract_bookkeeping_features(user_input)
print(json.dumps(features, indent=4))

In [2]:
user_input_list = [
    "Paid $25 for a taxi ride this afternoon",
    "Bought concert tickets for 80 dollars yesterday",
    "Spent 12 on coffee and a donut this morning",
    "Got groceries for $45.50 on 2025-05-10",
    "Went to the movies last night and spent 30",
    "Bought a new shirt for 20 bucks today",
    "Had dinner at a restaurant for $75 last Friday",
    "Subscribed to a streaming service for 15 dollars this month",
    "Filled up gas for 40 on 2025-05-08",
    "Bought a book today"
]

for user_input in user_input_list:
    features = extract_bookkeeping_features(user_input)
    print(json.dumps(features, indent=4))
    print(f"{'-' * 20}")

NameError: name 'extract_bookkeeping_features' is not defined